Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d59b0373
Commit
d59b0373
authored
Feb 08, 2018
by
peastman
Browse files
Convert OpenCLArray to use RAII
parent
44daf269
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
705 additions
and
801 deletions
+705
-801
platforms/opencl/include/OpenCLArray.h
platforms/opencl/include/OpenCLArray.h
+68
-3
platforms/opencl/include/OpenCLCompact.h
platforms/opencl/include/OpenCLCompact.h
+1
-0
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+22
-22
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+91
-105
platforms/opencl/include/OpenCLSort.h
platforms/opencl/include/OpenCLSort.h
+1
-0
platforms/opencl/src/OpenCLArray.cpp
platforms/opencl/src/OpenCLArray.cpp
+53
-11
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+70
-93
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+399
-567
No files found.
platforms/opencl/include/OpenCLArray.h
View file @
d59b0373
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -27,15 +27,19 @@
...
@@ -27,15 +27,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "windowsExportOpenCL.h"
#include "windowsExportOpenCL.h"
#include <cl.hpp>
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <vector>
#include <vector>
namespace
OpenMM
{
namespace
OpenMM
{
class
OpenCLContext
;
/**
/**
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* and for copying data to and from the OpenCL Buffer.
* and for copying data to and from the OpenCL Buffer.
...
@@ -69,6 +73,11 @@ public:
...
@@ -69,6 +73,11 @@ public:
static
OpenCLArray
*
create
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
const
std
::
string
&
name
)
{
static
OpenCLArray
*
create
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
const
std
::
string
&
name
)
{
return
new
OpenCLArray
(
context
,
buffer
,
size
,
sizeof
(
T
),
name
);
return
new
OpenCLArray
(
context
,
buffer
,
size
,
sizeof
(
T
),
name
);
}
}
/**
* Create an uninitialized OpenCLArray object. It does not point to any OpenCL Buffer,
* and cannot be used until initialize() is called on it.
*/
OpenCLArray
();
/**
/**
* Create an OpenCLArray object.
* Create an OpenCLArray object.
*
*
...
@@ -90,6 +99,61 @@ public:
...
@@ -90,6 +99,61 @@ public:
*/
*/
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
~
OpenCLArray
();
~
OpenCLArray
();
/**
* Initialize this object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
void
initialize
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
=
CL_MEM_READ_WRITE
);
/**
* Initialize this object to use a preexisting Buffer.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
);
/**
* Initialize this object. The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
template
<
class
T
>
void
initialize
(
OpenCLContext
&
context
,
int
size
,
const
std
::
string
&
name
,
cl_int
flags
=
CL_MEM_READ_WRITE
)
{
initialize
(
context
,
size
,
sizeof
(
T
),
name
,
flags
);
}
/**
* Initialize this object to use a preexisting Buffer. The template argument
* is the data type of each array element.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param name the name of the array
*/
template
<
class
T
>
void
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
const
std
::
string
&
name
)
{
initialize
(
context
,
buffer
,
size
,
sizeof
(
T
),
name
);
}
/**
* Recreate the internal storage to have a different size.
*/
void
resize
(
int
size
);
/**
* Get whether this array has been initialized.
*/
bool
isInitialized
()
const
{
return
(
buffer
!=
NULL
);
}
/**
/**
* Get the size of the array.
* Get the size of the array.
*/
*/
...
@@ -155,9 +219,10 @@ public:
...
@@ -155,9 +219,10 @@ public:
*/
*/
void
copyTo
(
OpenCLArray
&
dest
)
const
;
void
copyTo
(
OpenCLArray
&
dest
)
const
;
private:
private:
OpenCLContext
&
context
;
OpenCLContext
*
context
;
cl
::
Buffer
*
buffer
;
cl
::
Buffer
*
buffer
;
int
size
,
elementSize
;
int
size
,
elementSize
;
cl_int
flags
;
bool
ownsBuffer
;
bool
ownsBuffer
;
std
::
string
name
;
std
::
string
name
;
};
};
...
...
platforms/opencl/include/OpenCLCompact.h
View file @
d59b0373
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
*/
*/
#include "OpenCLArray.h"
#include "OpenCLArray.h"
#include "OpenCLContext.h"
namespace
OpenMM
{
namespace
OpenMM
{
...
...
platforms/opencl/include/OpenCLContext.h
View file @
d59b0373
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -39,11 +39,11 @@
...
@@ -39,11 +39,11 @@
#include <pthread.h>
#include <pthread.h>
#include <cl.hpp>
#include <cl.hpp>
#include "windowsExportOpenCL.h"
#include "windowsExportOpenCL.h"
#include "OpenCLArray.h"
#include "OpenCLPlatform.h"
#include "OpenCLPlatform.h"
namespace
OpenMM
{
namespace
OpenMM
{
class
OpenCLArray
;
class
OpenCLForceInfo
;
class
OpenCLForceInfo
;
class
OpenCLIntegrationUtilities
;
class
OpenCLIntegrationUtilities
;
class
OpenCLExpressionUtilities
;
class
OpenCLExpressionUtilities
;
...
@@ -227,49 +227,49 @@ public:
...
@@ -227,49 +227,49 @@ public:
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
*/
OpenCLArray
&
getPosq
()
{
OpenCLArray
&
getPosq
()
{
return
*
posq
;
return
posq
;
}
}
/**
/**
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
* Get the array which contains a correction to the position of each atom. This only exists if getUseMixedPrecision() returns true.
*/
*/
OpenCLArray
&
getPosqCorrection
()
{
OpenCLArray
&
getPosqCorrection
()
{
return
*
posqCorrection
;
return
posqCorrection
;
}
}
/**
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
*/
OpenCLArray
&
getVelm
()
{
OpenCLArray
&
getVelm
()
{
return
*
velm
;
return
velm
;
}
}
/**
/**
* Get the array which contains the force on each atom.
* Get the array which contains the force on each atom.
*/
*/
OpenCLArray
&
getForce
()
{
OpenCLArray
&
getForce
()
{
return
*
force
;
return
force
;
}
}
/**
/**
* Get the array which contains the buffers in which forces are computed.
* Get the array which contains the buffers in which forces are computed.
*/
*/
OpenCLArray
&
getForceBuffers
()
{
OpenCLArray
&
getForceBuffers
()
{
return
*
forceBuffers
;
return
forceBuffers
;
}
}
/**
/**
* Get the array which contains a contribution to each force represented as 64 bit fixed point.
* Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/
*/
OpenCLArray
&
getLongForceBuffer
()
{
OpenCLArray
&
getLongForceBuffer
()
{
return
*
longForceBuffer
;
return
longForceBuffer
;
}
}
/**
/**
* Get the array which contains the buffer in which energy is computed.
* Get the array which contains the buffer in which energy is computed.
*/
*/
OpenCLArray
&
getEnergyBuffer
()
{
OpenCLArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
return
energyBuffer
;
}
}
/**
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
*/
OpenCLArray
&
getEnergyParamDerivBuffer
()
{
OpenCLArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
return
energyParamDerivBuffer
;
}
}
/**
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
...
@@ -288,7 +288,7 @@ public:
...
@@ -288,7 +288,7 @@ public:
* Get the array which contains the index of each atom.
* Get the array which contains the index of each atom.
*/
*/
OpenCLArray
&
getAtomIndexArray
()
{
OpenCLArray
&
getAtomIndexArray
()
{
return
*
atomIndexDevice
;
return
atomIndexDevice
;
}
}
/**
/**
* Get the number of cells by which the positions are offset.
* Get the number of cells by which the positions are offset.
...
@@ -762,17 +762,17 @@ private:
...
@@ -762,17 +762,17 @@ private:
std
::
vector
<
mm_int4
>
posCellOffsets
;
std
::
vector
<
mm_int4
>
posCellOffsets
;
cl
::
Buffer
*
pinnedBuffer
;
cl
::
Buffer
*
pinnedBuffer
;
void
*
pinnedMemory
;
void
*
pinnedMemory
;
OpenCLArray
*
posq
;
OpenCLArray
posq
;
OpenCLArray
*
posqCorrection
;
OpenCLArray
posqCorrection
;
OpenCLArray
*
velm
;
OpenCLArray
velm
;
OpenCLArray
*
force
;
OpenCLArray
force
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
energyBuffer
;
OpenCLArray
*
energySum
;
OpenCLArray
energySum
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
atomIndexDevice
;
OpenCLArray
*
chargeBuffer
;
OpenCLArray
chargeBuffer
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
int
>
atomIndex
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
d59b0373
...
@@ -243,9 +243,8 @@ private:
...
@@ -243,9 +243,8 @@ private:
class
OpenCLCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
class
OpenCLCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
public:
public:
OpenCLCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
OpenCLCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
}
~
OpenCLCalcHarmonicBondForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -276,7 +275,7 @@ private:
...
@@ -276,7 +275,7 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
};
/**
/**
...
@@ -285,7 +284,7 @@ private:
...
@@ -285,7 +284,7 @@ private:
class
OpenCLCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
class
OpenCLCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
public:
public:
OpenCLCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
OpenCLCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
}
~
OpenCLCalcCustomBondForceKernel
();
~
OpenCLCalcCustomBondForceKernel
();
/**
/**
...
@@ -319,7 +318,7 @@ private:
...
@@ -319,7 +318,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
};
...
@@ -330,9 +329,8 @@ private:
...
@@ -330,9 +329,8 @@ private:
class
OpenCLCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
class
OpenCLCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
public:
public:
OpenCLCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
OpenCLCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
}
~
OpenCLCalcHarmonicAngleForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -363,7 +361,7 @@ private:
...
@@ -363,7 +361,7 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
};
/**
/**
...
@@ -372,7 +370,7 @@ private:
...
@@ -372,7 +370,7 @@ private:
class
OpenCLCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
class
OpenCLCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
public:
public:
OpenCLCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
OpenCLCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
}
~
OpenCLCalcCustomAngleForceKernel
();
~
OpenCLCalcCustomAngleForceKernel
();
/**
/**
...
@@ -406,7 +404,7 @@ private:
...
@@ -406,7 +404,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
};
...
@@ -417,9 +415,8 @@ private:
...
@@ -417,9 +415,8 @@ private:
class
OpenCLCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
class
OpenCLCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
public:
public:
OpenCLCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
OpenCLCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
}
~
OpenCLCalcPeriodicTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -450,7 +447,7 @@ private:
...
@@ -450,7 +447,7 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
};
/**
/**
...
@@ -459,9 +456,8 @@ private:
...
@@ -459,9 +456,8 @@ private:
class
OpenCLCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
class
OpenCLCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
public:
public:
OpenCLCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
OpenCLCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
}
~
OpenCLCalcRBTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -492,7 +488,7 @@ private:
...
@@ -492,7 +488,7 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLArray
*
params
;
OpenCLArray
params
;
};
};
/**
/**
...
@@ -501,9 +497,8 @@ private:
...
@@ -501,9 +497,8 @@ private:
class
OpenCLCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
class
OpenCLCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
public:
public:
OpenCLCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
OpenCLCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
,
coefficients
(
NULL
),
mapPositions
(
NULL
),
torsionMaps
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
)
{
}
}
~
OpenCLCalcCMAPTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -535,9 +530,9 @@ private:
...
@@ -535,9 +530,9 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
std
::
vector
<
mm_int2
>
mapPositionsVec
;
OpenCLArray
*
coefficients
;
OpenCLArray
coefficients
;
OpenCLArray
*
mapPositions
;
OpenCLArray
mapPositions
;
OpenCLArray
*
torsionMaps
;
OpenCLArray
torsionMaps
;
};
};
/**
/**
...
@@ -546,7 +541,7 @@ private:
...
@@ -546,7 +541,7 @@ private:
class
OpenCLCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
class
OpenCLCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
public:
public:
OpenCLCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
OpenCLCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
}
~
OpenCLCalcCustomTorsionForceKernel
();
~
OpenCLCalcCustomTorsionForceKernel
();
/**
/**
...
@@ -580,7 +575,7 @@ private:
...
@@ -580,7 +575,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
};
...
@@ -591,10 +586,7 @@ private:
...
@@ -591,10 +586,7 @@ private:
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
OpenCLCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
dispersionFft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
OpenCLCalcNonbondedForceKernel
();
~
OpenCLCalcNonbondedForceKernel
();
/**
/**
...
@@ -660,21 +652,21 @@ private:
...
@@ -660,21 +652,21 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
OpenCLArray
*
sigmaEpsilon
;
OpenCLArray
sigmaEpsilon
;
OpenCLArray
*
exceptionParams
;
OpenCLArray
exceptionParams
;
OpenCLArray
*
cosSinSums
;
OpenCLArray
cosSinSums
;
OpenCLArray
*
pmeGrid
;
OpenCLArray
pmeGrid
;
OpenCLArray
*
pmeGrid2
;
OpenCLArray
pmeGrid2
;
OpenCLArray
*
pmeBsplineModuliX
;
OpenCLArray
pmeBsplineModuliX
;
OpenCLArray
*
pmeBsplineModuliY
;
OpenCLArray
pmeBsplineModuliY
;
OpenCLArray
*
pmeBsplineModuliZ
;
OpenCLArray
pmeBsplineModuliZ
;
OpenCLArray
*
pmeDispersionBsplineModuliX
;
OpenCLArray
pmeDispersionBsplineModuliX
;
OpenCLArray
*
pmeDispersionBsplineModuliY
;
OpenCLArray
pmeDispersionBsplineModuliY
;
OpenCLArray
*
pmeDispersionBsplineModuliZ
;
OpenCLArray
pmeDispersionBsplineModuliZ
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
pmeAtomGridIndex
;
OpenCLArray
*
pmeEnergyBuffer
;
OpenCLArray
pmeEnergyBuffer
;
OpenCLSort
*
sort
;
OpenCLSort
*
sort
;
cl
::
CommandQueue
pmeQueue
;
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
cl
::
Event
pmeSyncEvent
;
...
@@ -717,7 +709,7 @@ private:
...
@@ -717,7 +709,7 @@ private:
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
public:
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
cl
(
cl
),
params
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
}
}
~
OpenCLCalcCustomNonbondedForceKernel
();
~
OpenCLCalcCustomNonbondedForceKernel
();
/**
/**
...
@@ -749,13 +741,13 @@ private:
...
@@ -749,13 +741,13 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
OpenCLArray
*
interactionGroupData
;
OpenCLArray
interactionGroupData
;
cl
::
Kernel
interactionGroupKernel
;
cl
::
Kernel
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
...
@@ -770,10 +762,8 @@ private:
...
@@ -770,10 +762,8 @@ private:
class
OpenCLCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
class
OpenCLCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
public:
public:
OpenCLCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cl
(
cl
),
OpenCLCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cl
(
cl
),
hasCreatedKernels
(
false
),
params
(
NULL
),
bornSum
(
NULL
),
longBornSum
(
NULL
),
bornRadii
(
NULL
),
bornForce
(
NULL
),
hasCreatedKernels
(
false
)
{
longBornForce
(
NULL
),
obcChain
(
NULL
)
{
}
}
~
OpenCLCalcGBSAOBCForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -804,13 +794,13 @@ private:
...
@@ -804,13 +794,13 @@ private:
int
maxTiles
;
int
maxTiles
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
OpenCLArray
*
params
;
OpenCLArray
params
;
OpenCLArray
*
bornSum
;
OpenCLArray
bornSum
;
OpenCLArray
*
longBornSum
;
OpenCLArray
longBornSum
;
OpenCLArray
*
bornRadii
;
OpenCLArray
bornRadii
;
OpenCLArray
*
bornForce
;
OpenCLArray
bornForce
;
OpenCLArray
*
longBornForce
;
OpenCLArray
longBornForce
;
OpenCLArray
*
obcChain
;
OpenCLArray
obcChain
;
cl
::
Kernel
computeBornSumKernel
;
cl
::
Kernel
computeBornSumKernel
;
cl
::
Kernel
reduceBornSumKernel
;
cl
::
Kernel
reduceBornSumKernel
;
cl
::
Kernel
force1Kernel
;
cl
::
Kernel
force1Kernel
;
...
@@ -823,8 +813,7 @@ private:
...
@@ -823,8 +813,7 @@ private:
class
OpenCLCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
class
OpenCLCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
public:
OpenCLCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
OpenCLCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cl
(
cl
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
hasInitializedKernels
(
false
),
cl
(
cl
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
system
(
system
)
{
valueBuffers
(
NULL
),
longValueBuffers
(
NULL
),
system
(
system
)
{
}
}
~
OpenCLCalcCustomGBForceKernel
();
~
OpenCLCalcCustomGBForceKernel
();
/**
/**
...
@@ -862,14 +851,14 @@ private:
...
@@ -862,14 +851,14 @@ private:
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivChain
;
OpenCLParameterSet
*
energyDerivChain
;
std
::
vector
<
OpenCLParameterSet
*>
dValuedParam
;
std
::
vector
<
OpenCLParameterSet
*>
dValuedParam
;
std
::
vector
<
OpenCLArray
*
>
dValue0dParam
;
std
::
vector
<
OpenCLArray
>
dValue0dParam
;
OpenCLArray
*
longEnergyDerivs
;
OpenCLArray
longEnergyDerivs
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
OpenCLArray
*
valueBuffers
;
OpenCLArray
valueBuffers
;
OpenCLArray
*
longValueBuffers
;
OpenCLArray
longValueBuffers
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
const
System
&
system
;
const
System
&
system
;
cl
::
Kernel
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
cl
::
Kernel
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
...
@@ -883,7 +872,7 @@ private:
...
@@ -883,7 +872,7 @@ private:
class
OpenCLCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
class
OpenCLCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
public:
public:
OpenCLCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
OpenCLCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cl
(
cl
),
system
(
system
),
params
(
NULL
)
{
}
}
~
OpenCLCalcCustomExternalForceKernel
();
~
OpenCLCalcCustomExternalForceKernel
();
/**
/**
...
@@ -917,7 +906,7 @@ private:
...
@@ -917,7 +906,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
};
};
...
@@ -928,8 +917,7 @@ private:
...
@@ -928,8 +917,7 @@ private:
class
OpenCLCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
class
OpenCLCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
public:
OpenCLCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
OpenCLCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
system
(
system
)
{
donorBufferIndices
(
NULL
),
acceptorBufferIndices
(
NULL
),
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
system
(
system
)
{
}
}
~
OpenCLCalcCustomHbondForceKernel
();
~
OpenCLCalcCustomHbondForceKernel
();
/**
/**
...
@@ -963,16 +951,16 @@ private:
...
@@ -963,16 +951,16 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
OpenCLParameterSet
*
donorParams
;
OpenCLParameterSet
*
donorParams
;
OpenCLParameterSet
*
acceptorParams
;
OpenCLParameterSet
*
acceptorParams
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
OpenCLArray
*
donors
;
OpenCLArray
donors
;
OpenCLArray
*
acceptors
;
OpenCLArray
acceptors
;
OpenCLArray
*
donorBufferIndices
;
OpenCLArray
donorBufferIndices
;
OpenCLArray
*
acceptorBufferIndices
;
OpenCLArray
acceptorBufferIndices
;
OpenCLArray
*
donorExclusions
;
OpenCLArray
donorExclusions
;
OpenCLArray
*
acceptorExclusions
;
OpenCLArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
const
System
&
system
;
cl
::
Kernel
donorKernel
,
acceptorKernel
;
cl
::
Kernel
donorKernel
,
acceptorKernel
;
};
};
...
@@ -983,7 +971,7 @@ private:
...
@@ -983,7 +971,7 @@ private:
class
OpenCLCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
class
OpenCLCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
public:
public:
OpenCLCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
OpenCLCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
groupParticles
(
NULL
),
groupWeights
(
NULL
),
groupOffsets
(
NULL
),
groupForces
(
NULL
),
bondGroups
(
NULL
),
centerPositions
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
}
}
~
OpenCLCalcCustomCentroidBondForceKernel
();
~
OpenCLCalcCustomCentroidBondForceKernel
();
/**
/**
...
@@ -1017,16 +1005,16 @@ private:
...
@@ -1017,16 +1005,16 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
OpenCLArray
*
groupParticles
;
OpenCLArray
groupParticles
;
OpenCLArray
*
groupWeights
;
OpenCLArray
groupWeights
;
OpenCLArray
*
groupOffsets
;
OpenCLArray
groupOffsets
;
OpenCLArray
*
groupForces
;
OpenCLArray
groupForces
;
OpenCLArray
*
bondGroups
;
OpenCLArray
bondGroups
;
OpenCLArray
*
centerPositions
;
OpenCLArray
centerPositions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
cl
::
Kernel
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
cl
::
Kernel
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
const
System
&
system
;
const
System
&
system
;
};
};
...
@@ -1037,7 +1025,7 @@ private:
...
@@ -1037,7 +1025,7 @@ private:
class
OpenCLCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
class
OpenCLCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
public:
public:
OpenCLCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
OpenCLCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
}
}
~
OpenCLCalcCustomCompoundBondForceKernel
();
~
OpenCLCalcCustomCompoundBondForceKernel
();
/**
/**
...
@@ -1070,10 +1058,10 @@ private:
...
@@ -1070,10 +1058,10 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
ForceInfo
*
info
;
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
const
System
&
system
;
};
};
...
@@ -1083,9 +1071,7 @@ private:
...
@@ -1083,9 +1071,7 @@ private:
class
OpenCLCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
class
OpenCLCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
public:
public:
OpenCLCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
OpenCLCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
particleTypes
(
NULL
),
orderIndex
(
NULL
),
particleOrder
(
NULL
),
exclusions
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
params
(
NULL
),
system
(
system
)
{
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighborPairs
(
NULL
),
numNeighborPairs
(
NULL
),
neighborStartIndex
(
NULL
),
numNeighborsForAtom
(
NULL
),
neighbors
(
NULL
),
system
(
system
)
{
}
}
~
OpenCLCalcCustomManyParticleForceKernel
();
~
OpenCLCalcCustomManyParticleForceKernel
();
/**
/**
...
@@ -1120,22 +1106,22 @@ private:
...
@@ -1120,22 +1106,22 @@ private:
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
globals
;
OpenCLArray
*
particleTypes
;
OpenCLArray
particleTypes
;
OpenCLArray
*
orderIndex
;
OpenCLArray
orderIndex
;
OpenCLArray
*
particleOrder
;
OpenCLArray
particleOrder
;
OpenCLArray
*
exclusions
;
OpenCLArray
exclusions
;
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
blockBoundingBox
;
OpenCLArray
*
neighborPairs
;
OpenCLArray
neighborPairs
;
OpenCLArray
*
numNeighborPairs
;
OpenCLArray
numNeighborPairs
;
OpenCLArray
*
neighborStartIndex
;
OpenCLArray
neighborStartIndex
;
OpenCLArray
*
numNeighborsForAtom
;
OpenCLArray
numNeighborsForAtom
;
OpenCLArray
*
neighbors
;
OpenCLArray
neighbors
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*
>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
const
System
&
system
;
const
System
&
system
;
cl
::
Kernel
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
cl
::
Kernel
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
};
};
...
...
platforms/opencl/include/OpenCLSort.h
View file @
d59b0373
...
@@ -28,6 +28,7 @@
...
@@ -28,6 +28,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "windowsExportOpenCL.h"
#include "windowsExportOpenCL.h"
namespace
OpenMM
{
namespace
OpenMM
{
...
...
platforms/opencl/src/OpenCLArray.cpp
View file @
d59b0373
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2012 Stanford University and the Authors.
*
* Portions copyright (c) 2012
-2018
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -25,14 +25,38 @@
...
@@ -25,14 +25,38 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <vector>
#include <vector>
using
namespace
OpenMM
;
using
namespace
OpenMM
;
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
:
OpenCLArray
::
OpenCLArray
()
:
buffer
(
NULL
),
ownsBuffer
(
false
)
{
context
(
context
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsBuffer
(
true
)
{
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
:
buffer
(
NULL
)
{
initialize
(
context
,
size
,
elementSize
,
name
,
flags
);
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
buffer
(
NULL
)
{
initialize
(
context
,
buffer
,
size
,
elementSize
,
name
);
}
OpenCLArray
::~
OpenCLArray
()
{
if
(
buffer
!=
NULL
&&
ownsBuffer
)
delete
buffer
;
}
void
OpenCLArray
::
initialize
(
OpenCLContext
&
context
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
,
cl_int
flags
)
{
if
(
buffer
!=
NULL
)
throw
OpenMMException
(
"OpenCLArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
this
->
flags
=
flags
;
ownsBuffer
=
true
;
try
{
try
{
buffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
flags
,
size
*
elementSize
);
buffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
flags
,
size
*
elementSize
);
}
}
...
@@ -43,18 +67,32 @@ OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, cons
...
@@ -43,18 +67,32 @@ OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, cons
}
}
}
}
OpenCLArray
::
OpenCLArray
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
:
void
OpenCLArray
::
initialize
(
OpenCLContext
&
context
,
cl
::
Buffer
*
buffer
,
int
size
,
int
elementSize
,
const
std
::
string
&
name
)
{
context
(
context
),
buffer
(
buffer
),
size
(
size
),
elementSize
(
elementSize
),
name
(
name
),
ownsBuffer
(
false
)
{
if
(
this
->
buffer
!=
NULL
)
throw
OpenMMException
(
"OpenCLArray has already been initialized"
);
this
->
context
=
&
context
;
this
->
buffer
=
buffer
;
this
->
size
=
size
;
this
->
elementSize
=
elementSize
;
this
->
name
=
name
;
ownsBuffer
=
false
;
}
}
OpenCLArray
::~
OpenCLArray
()
{
void
OpenCLArray
::
resize
(
int
size
)
{
if
(
ownsBuffer
)
if
(
buffer
==
NULL
)
delete
buffer
;
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
if
(
!
ownsBuffer
)
throw
OpenMMException
(
"Cannot resize an array that does not own its storage"
);
delete
buffer
;
buffer
=
NULL
;
initialize
(
*
context
,
size
,
elementSize
,
name
,
flags
);
}
}
void
OpenCLArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
void
OpenCLArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
try
{
try
{
context
.
getQueue
().
enqueueWriteBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
context
->
getQueue
().
enqueueWriteBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
}
}
catch
(
cl
::
Error
err
)
{
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
...
@@ -64,8 +102,10 @@ void OpenCLArray::upload(const void* data, bool blocking) {
...
@@ -64,8 +102,10 @@ void OpenCLArray::upload(const void* data, bool blocking) {
}
}
void
OpenCLArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
void
OpenCLArray
::
download
(
void
*
data
,
bool
blocking
)
const
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
try
{
try
{
context
.
getQueue
().
enqueueReadBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
context
->
getQueue
().
enqueueReadBuffer
(
*
buffer
,
blocking
?
CL_TRUE
:
CL_FALSE
,
0
,
size
*
elementSize
,
data
);
}
}
catch
(
cl
::
Error
err
)
{
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
...
@@ -75,10 +115,12 @@ void OpenCLArray::download(void* data, bool blocking) const {
...
@@ -75,10 +115,12 @@ void OpenCLArray::download(void* data, bool blocking) const {
}
}
void
OpenCLArray
::
copyTo
(
OpenCLArray
&
dest
)
const
{
void
OpenCLArray
::
copyTo
(
OpenCLArray
&
dest
)
const
{
if
(
buffer
==
NULL
)
throw
OpenMMException
(
"OpenCLArray has not been initialized"
);
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
try
{
try
{
context
.
getQueue
().
enqueueCopyBuffer
(
*
buffer
,
dest
.
getDeviceBuffer
(),
0
,
0
,
size
*
elementSize
);
context
->
getQueue
().
enqueueCopyBuffer
(
*
buffer
,
dest
.
getDeviceBuffer
(),
0
,
0
,
size
*
elementSize
);
}
}
catch
(
cl
::
Error
err
)
{
catch
(
cl
::
Error
err
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
d59b0373
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -68,9 +68,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -68,9 +68,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
}
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
useMixedPrecision
=
false
;
useMixedPrecision
=
false
;
...
@@ -275,23 +274,23 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -275,23 +274,23 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numAtomBlocks
=
(
paddedNumAtoms
+
(
TileSize
-
1
))
/
TileSize
;
numAtomBlocks
=
(
paddedNumAtoms
+
(
TileSize
-
1
))
/
TileSize
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
posq
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_real4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
posq
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posqCorrection
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"USE_MIXED_PRECISION"
]
=
"1"
;
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_double4"
;
}
}
else
{
else
{
posq
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_real4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_float4"
;
compilationDefines
[
"convert_mixed4"
]
=
"convert_float4"
;
}
}
...
@@ -429,28 +428,6 @@ OpenCLContext::~OpenCLContext() {
...
@@ -429,28 +428,6 @@ OpenCLContext::~OpenCLContext() {
delete
computation
;
delete
computation
;
if
(
pinnedBuffer
!=
NULL
)
if
(
pinnedBuffer
!=
NULL
)
delete
pinnedBuffer
;
delete
pinnedBuffer
;
if
(
posq
!=
NULL
)
delete
posq
;
if
(
posqCorrection
!=
NULL
)
delete
posqCorrection
;
if
(
velm
!=
NULL
)
delete
velm
;
if
(
force
!=
NULL
)
delete
force
;
if
(
forceBuffers
!=
NULL
)
delete
forceBuffers
;
if
(
longForceBuffer
!=
NULL
)
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
if
(
chargeBuffer
!=
NULL
)
delete
chargeBuffer
;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
delete
integration
;
delete
integration
;
if
(
expression
!=
NULL
)
if
(
expression
!=
NULL
)
...
@@ -471,42 +448,42 @@ void OpenCLContext::initialize() {
...
@@ -471,42 +448,42 @@ void OpenCLContext::initialize() {
numForceBuffers
=
std
::
max
(
numForceBuffers
,
force
->
getRequiredForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
force
->
getRequiredForceBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initializ
e
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initializ
e
<
mm_double4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initializ
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
}
else
{
else
{
forceBuffers
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initializ
e
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
creat
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initializ
e
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initializ
e
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initializ
e
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
}
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
creat
e
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
.
initializ
e
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
->
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
2
,
paddedNumAtoms
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
reduceForcesKernel
.
setArg
<
cl_int
>
(
3
,
numForceBuffers
);
addAutoclearBuffer
(
*
longForceBuffer
);
addAutoclearBuffer
(
longForceBuffer
);
}
}
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
addAutoclearBuffer
(
energyBuffer
);
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
OpenCLArray
::
creat
e
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
else
else
energyParamDerivBuffer
=
OpenCLArray
::
creat
e
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
energyParamDerivBuffer
.
initializ
e
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
addAutoclearBuffer
(
energyParamDerivBuffer
);
}
}
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBufferSize
*
energyBuffer
->
getElementSize
());
int
bufferBytes
=
max
(
velm
.
getSize
()
*
velm
.
getElementSize
(),
energyBufferSize
*
energyBuffer
.
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
@@ -516,12 +493,12 @@ void OpenCLContext::initialize() {
...
@@ -516,12 +493,12 @@ void OpenCLContext::initialize() {
else
else
((
mm_float4
*
)
pinnedMemory
)[
i
]
=
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
cl_float
)
(
1.0
/
mass
));
((
mm_float4
*
)
pinnedMemory
)[
i
]
=
mm_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
cl_float
)
(
1.0
/
mass
));
}
}
velm
->
upload
(
pinnedMemory
);
velm
.
upload
(
pinnedMemory
);
atomIndexDevice
=
OpenCLArray
::
creat
e
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
atomIndexDevice
.
initializ
e
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndexDevice"
);
atomIndex
.
resize
(
paddedNumAtoms
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
nonbonded
->
initialize
(
system
);
nonbonded
->
initialize
(
system
);
}
}
...
@@ -756,7 +733,7 @@ void OpenCLContext::reduceForces() {
...
@@ -756,7 +733,7 @@ void OpenCLContext::reduceForces() {
if
(
supports64BitGlobalAtomics
)
if
(
supports64BitGlobalAtomics
)
executeKernel
(
reduceForcesKernel
,
paddedNumAtoms
,
128
);
executeKernel
(
reduceForcesKernel
,
paddedNumAtoms
,
128
);
else
else
reduceBuffer
(
*
forceBuffers
,
numForceBuffers
);
reduceBuffer
(
forceBuffers
,
numForceBuffers
);
}
}
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
)
{
...
@@ -771,42 +748,42 @@ double OpenCLContext::reduceEnergy() {
...
@@ -771,42 +748,42 @@ double OpenCLContext::reduceEnergy() {
int
workGroupSize
=
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
int
workGroupSize
=
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
workGroupSize
>
512
)
if
(
workGroupSize
>
512
)
workGroupSize
=
512
;
workGroupSize
=
512
;
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
.
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
->
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
.
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
->
getElementSize
(),
NULL
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
.
getElementSize
(),
NULL
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
double
energy
;
double
energy
;
energySum
->
download
(
&
energy
);
energySum
.
download
(
&
energy
);
return
energy
;
return
energy
;
}
}
else
{
else
{
float
energy
;
float
energy
;
energySum
->
download
(
&
energy
);
energySum
.
download
(
&
energy
);
return
energy
;
return
energy
;
}
}
}
}
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
!
chargeBuffer
.
isInitialized
()
)
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
.
initialize
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
if
(
getUseDoublePrecision
())
{
if
(
getUseDoublePrecision
())
{
double
*
c
=
(
double
*
)
getPinnedBuffer
();
double
*
c
=
(
double
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
charges
[
i
];
c
[
i
]
=
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
}
else
{
else
{
float
*
c
=
(
float
*
)
getPinnedBuffer
();
float
*
c
=
(
float
*
)
getPinnedBuffer
();
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
charges
.
size
();
i
++
)
c
[
i
]
=
(
float
)
charges
[
i
];
c
[
i
]
=
(
float
)
charges
[
i
];
chargeBuffer
->
upload
(
c
);
chargeBuffer
.
upload
(
c
);
}
}
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
chargeBuffer
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
chargeBuffer
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
posq
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
posq
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
atomIndexDevice
->
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
atomIndexDevice
.
getDeviceBuffer
());
setChargesKernel
.
setArg
<
cl_int
>
(
3
,
numAtoms
);
setChargesKernel
.
setArg
<
cl_int
>
(
3
,
numAtoms
);
executeKernel
(
setChargesKernel
,
numAtoms
);
executeKernel
(
setChargesKernel
,
numAtoms
);
}
}
...
@@ -1069,16 +1046,16 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
...
@@ -1069,16 +1046,16 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector
<
mm_double4
>
newPosq
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
newPosq
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
...
@@ -1087,8 +1064,8 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
...
@@ -1087,8 +1064,8 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
vector
<
mm_float4
>
newPosqCorrection
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
newPosqCorrection
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
vector
<
mm_double4
>
newVelm
(
paddedNumAtoms
,
mm_double4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
...
@@ -1096,31 +1073,31 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
...
@@ -1096,31 +1073,31 @@ bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
posqCorrection
->
upload
(
newPosqCorrection
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
else
{
else
{
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
newPosq
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
vector
<
mm_float4
>
newVelm
(
paddedNumAtoms
,
mm_float4
(
0
,
0
,
0
,
0
));
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
}
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
for
(
auto
listener
:
reorderListeners
)
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
listener
->
execute
();
...
@@ -1152,10 +1129,10 @@ void OpenCLContext::reorderAtomsImpl() {
...
@@ -1152,10 +1129,10 @@ void OpenCLContext::reorderAtomsImpl() {
vector
<
Real4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
Real4
>
oldPosqCorrection
(
paddedNumAtoms
);
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
);
vector
<
Mixed4
>
oldVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
posq
.
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
.
download
(
oldVelm
);
if
(
useMixedPrecision
)
if
(
useMixedPrecision
)
posqCorrection
->
download
(
oldPosqCorrection
);
posqCorrection
.
download
(
oldPosqCorrection
);
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
...
@@ -1299,11 +1276,11 @@ void OpenCLContext::reorderAtomsImpl() {
...
@@ -1299,11 +1276,11 @@ void OpenCLContext::reorderAtomsImpl() {
atomIndex
[
i
]
=
originalIndex
[
i
];
atomIndex
[
i
]
=
originalIndex
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
posq
.
upload
(
newPosq
);
if
(
useMixedPrecision
)
if
(
useMixedPrecision
)
posqCorrection
->
upload
(
newPosqCorrection
);
posqCorrection
.
upload
(
newPosqCorrection
);
velm
->
upload
(
newVelm
);
velm
.
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
.
upload
(
atomIndex
);
for
(
auto
listener
:
reorderListeners
)
for
(
auto
listener
:
reorderListeners
)
listener
->
execute
();
listener
->
execute
();
}
}
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
d59b0373
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -532,11 +532,6 @@ private:
...
@@ -532,11 +532,6 @@ private:
const HarmonicBondForce& force;
const HarmonicBondForce& force;
};
};
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
...
@@ -545,18 +540,18 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
...
@@ -545,18 +540,18 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
if (numBonds == 0)
if (numBonds == 0)
return;
return;
vector<vector<int> > atoms(numBonds, vector<int>(2));
vector<vector<int> > atoms(numBonds, vector<int>(2));
params
= OpenCLArray::creat
e<mm_float2>(cl, numBonds, "bondParams");
params
.initializ
e<mm_float2>(cl, numBonds, "bondParams");
vector<mm_float2> paramVector(numBonds);
vector<mm_float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
for (int i = 0; i < numBonds; i++) {
double length, k;
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce;
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float2");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cl.addForce(info);
cl.addForce(info);
...
@@ -584,7 +579,7 @@ void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& con
...
@@ -584,7 +579,7 @@ void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& con
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -623,8 +618,6 @@ private:
...
@@ -623,8 +618,6 @@ private:
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...
@@ -671,9 +664,9 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
...
@@ -671,9 +664,9 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
variables[name] = "bondParams"+params->getParameterSuffix(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
...
@@ -702,7 +695,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
...
@@ -702,7 +695,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
}
}
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -711,7 +704,7 @@ double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool inclu
...
@@ -711,7 +704,7 @@ double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool inclu
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -770,11 +763,6 @@ private:
...
@@ -770,11 +763,6 @@ private:
const HarmonicAngleForce& force;
const HarmonicAngleForce& force;
};
};
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
...
@@ -783,7 +771,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
...
@@ -783,7 +771,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
if (numAngles == 0)
if (numAngles == 0)
return;
return;
vector<vector<int> > atoms(numAngles, vector<int>(3));
vector<vector<int> > atoms(numAngles, vector<int>(3));
params
= OpenCLArray::creat
e<mm_float2>(cl, numAngles, "angleParams");
params
.initializ
e<mm_float2>(cl, numAngles, "angleParams");
vector<mm_float2> paramVector(numAngles);
vector<mm_float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
for (int i = 0; i < numAngles; i++) {
double angle, k;
double angle, k;
...
@@ -791,11 +779,11 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
...
@@ -791,11 +779,11 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce;
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float2");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cl.addForce(info);
cl.addForce(info);
...
@@ -823,7 +811,7 @@ void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& co
...
@@ -823,7 +811,7 @@ void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& co
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -863,8 +851,6 @@ private:
...
@@ -863,8 +851,6 @@ private:
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...
@@ -911,9 +897,9 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
...
@@ -911,9 +897,9 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
variables[name] = "angleParams"+params->getParameterSuffix(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
...
@@ -942,7 +928,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
...
@@ -942,7 +928,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
}
}
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -951,7 +937,7 @@ double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool incl
...
@@ -951,7 +937,7 @@ double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -1011,11 +997,6 @@ private:
...
@@ -1011,11 +997,6 @@ private:
const PeriodicTorsionForce& force;
const PeriodicTorsionForce& force;
};
};
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
@@ -1024,7 +1005,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
...
@@ -1024,7 +1005,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
if (numTorsions == 0)
if (numTorsions == 0)
return;
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= OpenCLArray::creat
e<mm_float4>(cl, numTorsions, "periodicTorsionParams");
params
.initializ
e<mm_float4>(cl, numTorsions, "periodicTorsionParams");
vector<mm_float4> paramVector(numTorsions);
vector<mm_float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
for (int i = 0; i < numTorsions; i++) {
int periodicity;
int periodicity;
...
@@ -1032,11 +1013,11 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
...
@@ -1032,11 +1013,11 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce;
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float4");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cl.addForce(info);
cl.addForce(info);
...
@@ -1064,7 +1045,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl&
...
@@ -1064,7 +1045,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl&
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -1099,11 +1080,6 @@ private:
...
@@ -1099,11 +1080,6 @@ private:
const RBTorsionForce& force;
const RBTorsionForce& force;
};
};
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
@@ -1112,7 +1088,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
...
@@ -1112,7 +1088,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
if (numTorsions == 0)
if (numTorsions == 0)
return;
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= OpenCLArray::creat
e<mm_float8>(cl, numTorsions, "rbTorsionParams");
params
.initializ
e<mm_float8>(cl, numTorsions, "rbTorsionParams");
vector<mm_float8> paramVector(numTorsions);
vector<mm_float8> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
double c0, c1, c2, c3, c4, c5;
...
@@ -1120,11 +1096,11 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
...
@@ -1120,11 +1096,11 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce;
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
->
getDeviceBuffer(), "float8");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params
.
getDeviceBuffer(), "float8");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cl.addForce(info);
cl.addForce(info);
...
@@ -1152,7 +1128,7 @@ void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1152,7 +1128,7 @@ void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& contex
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -1189,15 +1165,6 @@ private:
...
@@ -1189,15 +1165,6 @@ private:
const CMAPTorsionForce& force;
const CMAPTorsionForce& force;
};
};
OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
...
@@ -1228,17 +1195,17 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
...
@@ -1228,17 +1195,17 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<cl_int> torsionMapsVec(numTorsions);
vector<cl_int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients
= OpenCLArray::creat
e<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
coefficients
.initializ
e<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
= OpenCLArray::creat
e<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
mapPositions
.initializ
e<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps
= OpenCLArray::creat
e<cl_int>(cl, numTorsions, "cmapTorsionMaps");
torsionMaps
.initializ
e<cl_int>(cl, numTorsions, "cmapTorsionMaps");
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
mapPositions
->
upload(mapPositionsVec);
mapPositions
.
upload(mapPositionsVec);
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients
->
getDeviceBuffer(), "float4");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients
.
getDeviceBuffer(), "float4");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions
->
getDeviceBuffer(), "int2");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions
.
getDeviceBuffer(), "int2");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps
->
getDeviceBuffer(), "int");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps
.
getDeviceBuffer(), "int");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cl.addForce(info);
cl.addForce(info);
...
@@ -1254,9 +1221,9 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -1254,9 +1221,9 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
numTorsions = endIndex-startIndex;
if (mapPositions
->
getSize() != numMaps)
if (mapPositions
.
getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps
->
getSize() != numTorsions)
if (torsionMaps
.
getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
// Update the maps.
...
@@ -1279,7 +1246,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -1279,7 +1246,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
}
}
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
// Update the indices.
// Update the indices.
...
@@ -1288,7 +1255,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -1288,7 +1255,7 @@ void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& cont
int index[8];
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
}
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
}
}
class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
...
@@ -1325,8 +1292,6 @@ private:
...
@@ -1325,8 +1292,6 @@ private:
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...
@@ -1373,9 +1338,9 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
...
@@ -1373,9 +1338,9 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
variables[name] = "torsionParams"+params->getParameterSuffix(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
...
@@ -1404,7 +1369,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
...
@@ -1404,7 +1369,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
}
}
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -1413,7 +1378,7 @@ double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool in
...
@@ -1413,7 +1378,7 @@ double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -1479,27 +1444,23 @@ private:
...
@@ -1479,27 +1444,23 @@ private:
class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
public:
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel), forceTemp(NULL) {
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel) {
forceTemp = OpenCLArray::create<mm_float4>(cl, cl.getNumAtoms(), "PmeForce");
forceTemp.initialize<mm_float4>(cl, cl.getNumAtoms(), "PmeForce");
addForcesKernel.setArg<cl::Buffer>(0, forceTemp->getDeviceBuffer());
addForcesKernel.setArg<cl::Buffer>(0, forceTemp.getDeviceBuffer());
}
~PmeIO() {
if (forceTemp != NULL)
delete forceTemp;
}
}
float* getPosq() {
float* getPosq() {
cl.getPosq().download(posq);
cl.getPosq().download(posq);
return (float*) &posq[0];
return (float*) &posq[0];
}
}
void setForce(float* force) {
void setForce(float* force) {
forceTemp
->
upload(force);
forceTemp
.
upload(force);
addForcesKernel.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
cl.executeKernel(addForcesKernel, cl.getNumAtoms());
cl.executeKernel(addForcesKernel, cl.getNumAtoms());
}
}
private:
private:
OpenCLContext& cl;
OpenCLContext& cl;
vector<mm_float4> posq;
vector<mm_float4> posq;
OpenCLArray
*
forceTemp;
OpenCLArray forceTemp;
cl::Kernel addForcesKernel;
cl::Kernel addForcesKernel;
};
};
...
@@ -1577,36 +1538,6 @@ private:
...
@@ -1577,36 +1538,6 @@ private:
};
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeGrid2 != NULL)
delete pmeGrid2;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
if (sort != NULL)
delete sort;
delete sort;
if (fft != NULL)
if (fft != NULL)
...
@@ -1635,7 +1566,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1635,7 +1566,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
sigmaEpsilon
= OpenCLArray::creat
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon
.initializ
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
vector<mm_float4> posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0));
vector<mm_float4> posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0));
vector<mm_double4> posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0));
vector<mm_double4> posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0));
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
...
@@ -1671,7 +1602,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1671,7 +1602,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl.getPosq().upload(posqd);
cl.getPosq().upload(posqd);
else
else
cl.getPosq().upload(posqf);
cl.getPosq().upload(posqf);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...
@@ -1726,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1726,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums
= new OpenCLArray
(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums
.initialize
(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...
@@ -1796,26 +1727,26 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1796,26 +1727,26 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeGrid
= new OpenCLArray
(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid
.initialize
(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2
= new OpenCLArray
(cl, gridElements, 2*elementSize, "pmeGrid2");
pmeGrid2
.initialize
(cl, gridElements, 2*elementSize, "pmeGrid2");
if (cl.getSupports64BitGlobalAtomics())
if (cl.getSupports64BitGlobalAtomics())
cl.addAutoclearBuffer(
*
pmeGrid2);
cl.addAutoclearBuffer(pmeGrid2);
else
else
cl.addAutoclearBuffer(
*
pmeGrid);
cl.addAutoclearBuffer(pmeGrid);
pmeBsplineModuliX
= new OpenCLArray
(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliX
.initialize
(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
= new OpenCLArray
(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliY
.initialize
(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
= new OpenCLArray
(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineModuliZ
.initialize
(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
if (doLJPME) {
pmeDispersionBsplineModuliX
= new OpenCLArray
(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliX
.initialize
(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
= new OpenCLArray
(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliY
.initialize
(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
= new OpenCLArray
(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
pmeDispersionBsplineModuliZ
.initialize
(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
}
pmeBsplineTheta
= new OpenCLArray
(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeBsplineTheta
.initialize
(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange
= OpenCLArray::creat
e<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomRange
.initializ
e<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
= OpenCLArray::creat
e<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
pmeAtomGridIndex
.initializ
e<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer
= new OpenCLArray
(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
pmeEnergyBuffer
.initialize
(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cl.clearBuffer(
*
pmeEnergyBuffer);
cl.clearBuffer(pmeEnergyBuffer);
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
if (doLJPME)
...
@@ -1832,7 +1763,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1832,7 +1763,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (recipForceGroup < 0)
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
recipForceGroup = force.getForceGroup();
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent,
*
pmeEnergyBuffer, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent, pmeEnergyBuffer, recipForceGroup));
}
}
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -1844,9 +1775,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1844,9 +1775,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = gridSizeX;
xsize = gridSizeX;
ysize = gridSizeY;
ysize = gridSizeY;
zsize = gridSizeZ;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
xmoduli =
&
pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
ymoduli =
&
pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
zmoduli =
&
pmeBsplineModuliZ;
}
}
else {
else {
if (!doLJPME)
if (!doLJPME)
...
@@ -1854,9 +1785,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1854,9 +1785,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
xsize = dispersionGridSizeX;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
xmoduli =
&
pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
ymoduli =
&
pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
zmoduli =
&
pmeDispersionBsplineModuliZ;
}
}
int maxSize = max(max(xsize, ysize), zsize);
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
vector<double> data(PmeOrder);
...
@@ -1938,7 +1869,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1938,7 +1869,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
if (hasLJ)
if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon
->
getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon
.
getDeviceBuffer()));
// Initialize the exceptions.
// Initialize the exceptions.
...
@@ -1949,7 +1880,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1949,7 +1880,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (numExceptions > 0) {
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams
= OpenCLArray::creat
e<mm_float4>(cl, numExceptions, "exceptionParams");
exceptionParams
.initializ
e<mm_float4>(cl, numExceptions, "exceptionParams");
vector<mm_float4> exceptionParamsVector(numExceptions);
vector<mm_float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
double chargeProd, sigma, epsilon;
...
@@ -1957,9 +1888,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1957,9 +1888,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams
->
getDeviceBuffer(), "float4");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams
.
getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
}
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...
@@ -1970,15 +1901,15 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1970,15 +1901,15 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
hasInitializedKernel = true;
if (cosSinSums
!= NULL
) {
if (cosSinSums
.isInitialized()
) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums
->
getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums
.
getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums
->
getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums
.
getDeviceBuffer());
}
}
if (pmeGrid
!= NULL
) {
if (pmeGrid
.isInitialized()
) {
// Create kernels for Coulomb PME.
// Create kernels for Coulomb PME.
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
...
@@ -1991,39 +1922,39 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1991,39 +1922,39 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
->
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
.
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
->
getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
->
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
.
getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
.
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
if (cl.getSupports64BitGlobalAtomics())
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
.
getDeviceBuffer());
else
else
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
.
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
->
getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(1, pmeBsplineModuliX
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliY
.
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ
->
getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliZ
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
->
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
.
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY
.
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ
->
getDeviceBuffer());
pmeEvalEnergyKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ
.
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
->
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
.
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
->
getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
->
getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
.
getDeviceBuffer());
}
}
if (usePmeQueue)
if (usePmeQueue)
syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
...
@@ -2048,50 +1979,50 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2048,50 +1979,50 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta
.
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg<cl::Buffer>(12, sigmaEpsilon
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
->
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange
.
getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
if (cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid2
.
getDeviceBuffer());
else
else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(3, pmeGrid
.
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(4, pmeBsplineTheta
.
getDeviceBuffer());
if (deviceIsCpu || cl.getSupports64BitGlobalAtomics())
if (deviceIsCpu || cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(13, sigmaEpsilon
.
getDeviceBuffer());
else
else
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg<cl::Buffer>(5, sigmaEpsilon
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(1, pmeDispersionBsplineModuliX
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliY
.
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ
->
getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliZ
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
->
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(1, usePmeQueue ? pmeEnergyBuffer
.
getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(2, pmeDispersionBsplineModuliX
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(3, pmeDispersionBsplineModuliY
.
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ
->
getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg<cl::Buffer>(4, pmeDispersionBsplineModuliZ
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex
.
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon
->
getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg<cl::Buffer>(12, sigmaEpsilon
.
getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
if (cl.getSupports64BitGlobalAtomics()) {
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
->
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2
.
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
->
getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg<cl::Buffer>(1, pmeGrid
.
getDeviceBuffer());
}
}
}
}
}
}
}
}
if (cosSinSums
!= NULL
&& includeReciprocal) {
if (cosSinSums
.isInitialized()
&& includeReciprocal) {
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
...
@@ -2107,10 +2038,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2107,10 +2038,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
}
}
cl.executeKernel(ewaldSumsKernel, cosSinSums
->
getSize());
cl.executeKernel(ewaldSumsKernel, cosSinSums
.
getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
}
if (pmeGrid
!= NULL
&& includeReciprocal) {
if (pmeGrid
.isInitialized()
&& includeReciprocal) {
if (usePmeQueue && !includeEnergy)
if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue);
cl.setQueue(pmeQueue);
...
@@ -2157,7 +2088,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2157,7 +2088,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
}
else {
else {
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
...
@@ -2184,7 +2115,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2184,7 +2115,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
}
}
fft->execFFT(
*
pmeGrid,
*
pmeGrid2, true);
fft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...
@@ -2205,7 +2136,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2205,7 +2136,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy)
if (includeEnergy)
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(
*
pmeGrid2,
*
pmeGrid, false);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...
@@ -2236,7 +2167,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2236,7 +2167,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms());
cl.executeKernel(pmeDispersionUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(
*
pmeGrid);
cl.clearBuffer(pmeGrid);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...
@@ -2251,9 +2182,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2251,9 +2182,9 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
cl.executeKernel(pmeDispersionSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
}
else {
else {
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(
*
pmeGrid2);
cl.clearBuffer(pmeGrid2);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeDispersionSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeDispersionSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
...
@@ -2269,7 +2200,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2269,7 +2200,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
}
}
else {
else {
cl.clearBuffer(
*
pmeGrid);
cl.clearBuffer(pmeGrid);
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
cl.executeKernel(pmeDispersionAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
setPeriodicBoxSizeArg(cl, pmeDispersionZIndexKernel, 2);
if (cl.getUseDoublePrecision())
if (cl.getUseDoublePrecision())
...
@@ -2280,7 +2211,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2280,7 +2211,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeDispersionSpreadChargeKernel, cl.getNumAtoms());
}
}
}
}
dispersionFft->execFFT(
*
pmeGrid,
*
pmeGrid2, true);
dispersionFft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeDispersionConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
...
@@ -2301,7 +2232,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -2301,7 +2232,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy)
if (includeEnergy)
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(
*
pmeGrid2,
*
pmeGrid, false);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
...
@@ -2374,7 +2305,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -2374,7 +2305,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
sumSquaredCharges += charge*charge;
sumSquaredCharges += charge*charge;
}
}
cl.setCharges(chargeVector);
cl.setCharges(chargeVector);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
// Record the exceptions.
// Record the exceptions.
...
@@ -2386,7 +2317,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -2386,7 +2317,7 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
}
}
// Compute other values.
// Compute other values.
...
@@ -2473,12 +2404,6 @@ private:
...
@@ -2473,12 +2404,6 @@ private:
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL)
if (forceCopy != NULL)
delete forceCopy;
delete forceCopy;
}
}
...
@@ -2494,7 +2419,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2494,7 +2419,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles);
vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
...
@@ -2519,7 +2444,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2519,7 +2444,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
vector<string> tableTypes;
for (int i = 0; i < force.getNumFunctions(); i++) {
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
string arrayName = prefix+"table"+cl.intToString(i);
string arrayName = prefix+"table"+cl.intToString(i);
...
@@ -2527,9 +2453,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2527,9 +2453,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDeviceBuffer()));
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDeviceBuffer()));
if (width == 1)
if (width == 1)
tableTypes.push_back("float");
tableTypes.push_back("float");
else
else
...
@@ -2544,8 +2470,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2544,8 +2470,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...
@@ -2599,9 +2525,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2599,9 +2525,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
->
getDeviceBuffer()));
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
.
getDeviceBuffer()));
}
}
}
}
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
...
@@ -2785,8 +2711,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2785,8 +2711,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
for (; indexInTileSet < 32; indexInTileSet++)
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(mm_int4(0, 0, minSize<<16, 0));
groupData.push_back(mm_int4(0, 0, minSize<<16, 0));
}
}
interactionGroupData
= OpenCLArray::creat
e<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData
.initializ
e<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData
->
upload(groupData);
interactionGroupData
.
upload(groupData);
// Create the kernel.
// Create the kernel.
...
@@ -2812,7 +2738,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2812,7 +2738,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
if (globals
!= NULL
)
if (globals
.isInitialized()
)
args<<", __global const float* restrict globals";
args<<", __global const float* restrict globals";
if (hasParamDerivs)
if (hasParamDerivs)
args << ", __global mixed* restrict energyParamDerivs";
args << ", __global mixed* restrict energyParamDerivs";
...
@@ -2883,7 +2809,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
...
@@ -2883,7 +2809,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
}
}
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -2892,7 +2818,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2892,7 +2818,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed) {
if (changed) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
if (forceCopy != NULL) {
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
hasInitializedLongRangeCorrection = true;
...
@@ -2903,7 +2829,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2903,7 +2829,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
hasInitializedLongRangeCorrection = true;
}
}
if (interactionGroupData
!= NULL
) {
if (interactionGroupData
.isInitialized()
) {
if (!hasInitializedKernel) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
hasInitializedKernel = true;
int index = 0;
int index = 0;
...
@@ -2911,14 +2837,14 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2911,14 +2837,14 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData
->
getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData
.
getDeviceBuffer());
index += 5;
index += 5;
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
interactionGroupKernel.setArg<cl::Memory>(index++, function
->
getDeviceBuffer());
interactionGroupKernel.setArg<cl::Memory>(index++, function
.
getDeviceBuffer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
interactionGroupKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
if (hasParamDerivs)
if (hasParamDerivs)
interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
}
}
...
@@ -2978,43 +2904,26 @@ private:
...
@@ -2978,43 +2904,26 @@ private:
const GBSAOBCForce& force;
const GBSAOBCForce& force;
};
};
OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (longBornSum != NULL)
delete longBornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (longBornForce != NULL)
delete longBornForce;
if (obcChain != NULL)
delete obcChain;
}
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
if (cl.getPlatformData().contexts.size() > 1)
if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices");
throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices");
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params
= OpenCLArray::creat
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
params
.initializ
e<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
bornRadii
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii");
bornRadii
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "bornRadii");
obcChain
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain");
obcChain
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "obcChain");
if (cl.getSupports64BitGlobalAtomics()) {
if (cl.getSupports64BitGlobalAtomics()) {
longBornSum
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornSum
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
longBornForce
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce
= new OpenCLArray
(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce");
bornForce
.initialize
(cl, cl.getPaddedNumAtoms(), elementSize, "bornForce");
cl.addAutoclearBuffer(
*
longBornSum);
cl.addAutoclearBuffer(longBornSum);
cl.addAutoclearBuffer(
*
longBornForce);
cl.addAutoclearBuffer(longBornForce);
}
}
else {
else {
bornSum
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum");
bornSum
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornSum");
bornForce
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce");
bornForce
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "bornForce");
cl.addAutoclearBuffer(
*
bornSum);
cl.addAutoclearBuffer(bornSum);
cl.addAutoclearBuffer(
*
bornForce);
cl.addAutoclearBuffer(bornForce);
}
}
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
vector<mm_double4> posqd(cl.getPaddedNumAtoms());
vector<mm_double4> posqd(cl.getPaddedNumAtoms());
...
@@ -3034,7 +2943,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
...
@@ -3034,7 +2943,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cl.getPosq().upload(posqd);
cl.getPosq().upload(posqd);
else
else
cl.getPosq().upload(posqf);
cl.getPosq().upload(posqf);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...
@@ -3042,8 +2951,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
...
@@ -3042,8 +2951,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
cutoff = force.getCutoffDistance();
cutoff = force.getCutoffDistance();
string source = OpenCLKernelSources::gbsaObc2;
string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params
->
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params
.
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce
->
getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce
.
getDeviceBuffer()));;
info = new ForceInfo(nb.getNumForceBuffers(), force);
info = new ForceInfo(nb.getNumForceBuffers(), force);
cl.addForce(info);
cl.addForce(info);
}
}
...
@@ -3089,9 +2998,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -3089,9 +2998,9 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
bool useLong = cl.getSupports64BitGlobalAtomics();
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
int index = 0;
computeBornSumKernel = cl::Kernel(program, "computeBornSum");
computeBornSumKernel = cl::Kernel(program, "computeBornSum");
computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum
->
getDeviceBuffer() : bornSum
->
getDeviceBuffer()));
computeBornSumKernel.setArg<cl::Buffer>(index++, (useLong ? longBornSum
.
getDeviceBuffer() : bornSum
.
getDeviceBuffer()));
computeBornSumKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, params
->
getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, params
.
getDeviceBuffer());
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
...
@@ -3107,10 +3016,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -3107,10 +3016,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel = cl::Kernel(program, "computeGBSAForce1");
force1Kernel = cl::Kernel(program, "computeGBSAForce1");
index = 0;
index = 0;
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce
->
getDeviceBuffer() : bornForce
->
getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, (useLong ? longBornForce
.
getDeviceBuffer() : bornForce
.
getDeviceBuffer()));
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii
->
getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii
.
getDeviceBuffer());
index++; // Whether to include energy.
index++; // Whether to include energy.
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...
@@ -3131,21 +3040,21 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -3131,21 +3040,21 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornSumKernel.setArg<cl_float>(2, 1.0f);
reduceBornSumKernel.setArg<cl_float>(2, 1.0f);
reduceBornSumKernel.setArg<cl_float>(3, 0.8f);
reduceBornSumKernel.setArg<cl_float>(3, 0.8f);
reduceBornSumKernel.setArg<cl_float>(4, 4.85f);
reduceBornSumKernel.setArg<cl_float>(4, 4.85f);
reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum
->
getDeviceBuffer() : bornSum
->
getDeviceBuffer()));
reduceBornSumKernel.setArg<cl::Buffer>(5, (useLong ? longBornSum
.
getDeviceBuffer() : bornSum
.
getDeviceBuffer()));
reduceBornSumKernel.setArg<cl::Buffer>(6, params
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(6, params
.
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(7, bornRadii
.
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain
->
getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, obcChain
.
getDeviceBuffer());
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
index = 0;
index = 0;
reduceBornForceKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceBornForceKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornForce
.
getDeviceBuffer());
if (useLong)
if (useLong)
reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, longBornForce
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, params
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, params
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii
.
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain
->
getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain
.
getDeviceBuffer());
}
}
force1Kernel.setArg<cl_int>(5, includeEnergy);
force1Kernel.setArg<cl_int>(5, includeEnergy);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
...
@@ -3188,7 +3097,7 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -3188,7 +3097,7 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context,
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
}
}
cl.setCharges(chargeVector);
cl.setCharges(chargeVector);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -3235,18 +3144,6 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
...
@@ -3235,18 +3144,6 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete energyDerivs;
delete energyDerivs;
if (energyDerivChain != NULL)
if (energyDerivChain != NULL)
delete energyDerivChain;
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (longValueBuffers != NULL)
delete longValueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam)
for (auto d : dValuedParam)
delete d;
delete d;
}
}
...
@@ -3284,7 +3181,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3284,7 +3181,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cl.getUseDoublePrecision());
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cl.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(paddedNumParticles, vector<cl_float>(numParams, 0));
vector<vector<cl_float> > paramVector(paddedNumParticles, vector<cl_float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
...
@@ -3308,6 +3205,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3308,6 +3205,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -3316,9 +3214,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3316,9 +3214,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDeviceBuffer()));
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDeviceBuffer()));
tableArgs << ", __global const float";
tableArgs << ", __global const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -3333,8 +3231,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3333,8 +3231,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
// Record derivatives of expressions needed for the chain rule terms.
...
@@ -3385,7 +3283,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3385,7 +3283,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool useLong = cl.getSupports64BitGlobalAtomics();
bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) {
if (useLong) {
longEnergyDerivs
= OpenCLArray::creat
e<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
longEnergyDerivs
.initializ
e<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
}
}
else
else
...
@@ -3393,13 +3291,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3393,13 +3291,14 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "dValuedParam", true, cl.getUseDoublePrecision()));
dValuedParam.push_back(new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "dValuedParam", true, cl.getUseDoublePrecision()));
if (useLong)
if (useLong)
dValue0dParam
.push_back(OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam")
)
;
dValue0dParam
[i].initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "dValue0dParam");
else
else
dValue0dParam
.push_back(new OpenCLArray
(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam")
)
;
dValue0dParam
[i].initialize
(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "dValue0dParam");
cl.addAutoclearBuffer(
*
dValue0dParam
.back()
);
cl.addAutoclearBuffer(dValue0dParam
[i]
);
string name = force.getEnergyParameterDerivativeName(i);
string name = force.getEnergyParameterDerivativeName(i);
cl.addEnergyParameterDerivative(name);
cl.addEnergyParameterDerivative(name);
}
}
...
@@ -4035,9 +3934,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -4035,9 +3934,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
->
getDeviceBuffer()));
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals
.
getDeviceBuffer()));
}
}
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters)
for (auto param : parameters)
...
@@ -4048,7 +3947,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -4048,7 +3947,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info);
cl.addForce(info);
if (useLong)
if (useLong)
cl.addAutoclearBuffer(
*
longEnergyDerivs);
cl.addAutoclearBuffer(longEnergyDerivs);
else {
else {
for (auto& buffer : energyDerivs->getBuffers())
for (auto& buffer : energyDerivs->getBuffers())
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects());
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects());
...
@@ -4099,21 +3998,21 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4099,21 +3998,21 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
bool useLong = cl.getSupports64BitGlobalAtomics();
bool useLong = cl.getSupports64BitGlobalAtomics();
if (useLong) {
if (useLong) {
longValueBuffers
= OpenCLArray::creat
e<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
longValueBuffers
.initializ
e<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(
*
longValueBuffers);
cl.addAutoclearBuffer(longValueBuffers);
cl.clearBuffer(
*
longValueBuffers);
cl.clearBuffer(longValueBuffers);
}
}
else {
else {
valueBuffers
= new OpenCLArray
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers");
valueBuffers
.initialize
(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "customGBValueBuffers");
cl.addAutoclearBuffer(
*
valueBuffers);
cl.addAutoclearBuffer(valueBuffers);
cl.clearBuffer(
*
valueBuffers);
cl.clearBuffer(valueBuffers);
}
}
int index = 0;
int index = 0;
pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
->
getDeviceBuffer() : valueBuffers
->
getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
.
getDeviceBuffer() : valueBuffers
.
getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
...
@@ -4126,8 +4025,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4126,8 +4025,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
else
else
pairValueKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
pairValueKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals
!= NULL
)
if (globals
.isInitialized()
)
pairValueKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i]) {
if (pairValueUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...
@@ -4135,30 +4034,30 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4135,30 +4034,30 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL);
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*buffer.getSize(), NULL);
}
}
}
}
for (auto d : dValue0dParam) {
for (auto
&
d : dValue0dParam) {
pairValueKernel.setArg<cl::Buffer>(index++, d
->
getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, d
.
getDeviceBuffer());
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d
->
getElementSize(), NULL);
pairValueKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*d
.
getElementSize(), NULL);
}
}
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
pairValueKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
index = 0;
perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
->
getDeviceBuffer() : valueBuffers
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, useLong ? longValueBuffers
.
getDeviceBuffer() : valueBuffers
.
getDeviceBuffer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
perParticleValueKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
perParticleValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i]
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Memory>(index++, dValue0dParam[i]
.
getDeviceBuffer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueKernel.setArg<cl::Memory>(index++, dValuedParam[i]->getBuffers()[j].getMemory());
perParticleValueKernel.setArg<cl::Memory>(index++, dValuedParam[i]->getBuffers()[j].getMemory());
}
}
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
perParticleValueKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
index = 0;
pairEnergyKernel.setArg<cl::Buffer>(index++, useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
...
@@ -4179,8 +4078,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4179,8 +4078,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
else
else
pairEnergyKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
pairEnergyKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
if (globals
!= NULL
)
if (globals
.isInitialized()
)
pairEnergyKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i]) {
if (pairEnergyUsesParam[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...
@@ -4196,7 +4095,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4196,7 +4095,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
}
}
if (useLong) {
if (useLong) {
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
->
getDeviceBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
.
getDeviceBuffer());
for (int i = 0; i < numComputedValues; ++i)
for (int i = 0; i < numComputedValues; ++i)
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
}
}
...
@@ -4208,16 +4107,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4208,16 +4107,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
pairEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
pairEnergyKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
index = 0;
perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
...
@@ -4227,17 +4126,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4227,17 +4126,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
for (auto& buffer : energyDerivChain->getBuffers())
for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
perParticleEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
if (useLong)
if (useLong)
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
->
getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs
.
getDeviceBuffer());
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
perParticleEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (needParameterGradient || needEnergyParamDerivs) {
if (needParameterGradient || needEnergyParamDerivs) {
index = 0;
index = 0;
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
gradientChainRuleKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
gradientChainRuleKernel.setArg<cl::Memory>(index++, buffer.getMemory());
gradientChainRuleKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
...
@@ -4252,7 +4151,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4252,7 +4151,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
}
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -4261,7 +4160,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4261,7 +4160,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
pairEnergyKernel.setArg<cl_int>(7, includeEnergy);
pairEnergyKernel.setArg<cl_int>(7, includeEnergy);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
...
@@ -4342,8 +4241,6 @@ private:
...
@@ -4342,8 +4241,6 @@ private:
OpenCLCalcCustomExternalForceKernel::~OpenCLCalcCustomExternalForceKernel() {
OpenCLCalcCustomExternalForceKernel::~OpenCLCalcCustomExternalForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...
@@ -4398,9 +4295,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -4398,9 +4295,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
variables[name] = "particleParams"+params->getParameterSuffix(i);
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
...
@@ -4422,7 +4319,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -4422,7 +4319,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
}
}
double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -4431,7 +4328,7 @@ double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool i
...
@@ -4431,7 +4328,7 @@ double OpenCLCalcCustomExternalForceKernel::execute(ContextImpl& context, bool i
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -4544,22 +4441,6 @@ OpenCLCalcCustomHbondForceKernel::~OpenCLCalcCustomHbondForceKernel() {
...
@@ -4544,22 +4441,6 @@ OpenCLCalcCustomHbondForceKernel::~OpenCLCalcCustomHbondForceKernel() {
delete donorParams;
delete donorParams;
if (acceptorParams != NULL)
if (acceptorParams != NULL)
delete acceptorParams;
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (donorBufferIndices != NULL)
delete donorBufferIndices;
if (acceptorBufferIndices != NULL)
delete acceptorBufferIndices;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
}
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...
@@ -4586,12 +4467,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4586,12 +4467,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
if (numDonors == 0 || numAcceptors == 0)
if (numDonors == 0 || numAcceptors == 0)
return;
return;
int numParticles = system.getNumParticles();
int numParticles = system.getNumParticles();
donors
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonors");
donors
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonors");
acceptors
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
acceptors
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > donorParamVector(numDonors);
vector<vector<cl_float> > donorParamVector(numDonors);
vector<mm_int4> donorVector(numDonors);
vector<mm_int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
for (int i = 0; i < numDonors; i++) {
...
@@ -4601,7 +4482,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4601,7 +4482,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++)
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (cl_float) parameters[j];
donorParamVector[i][j] = (cl_float) parameters[j];
}
}
donors
->
upload(donorVector);
donors
.
upload(donorVector);
donorParams->setParameterValues(donorParamVector);
donorParams->setParameterValues(donorParamVector);
vector<vector<cl_float> > acceptorParamVector(numAcceptors);
vector<vector<cl_float> > acceptorParamVector(numAcceptors);
vector<mm_int4> acceptorVector(numAcceptors);
vector<mm_int4> acceptorVector(numAcceptors);
...
@@ -4612,13 +4493,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4612,13 +4493,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int j = 0; j < (int) parameters.size(); j++)
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (cl_float) parameters[j];
acceptorParamVector[i][j] = (cl_float) parameters[j];
}
}
acceptors
->
upload(acceptorVector);
acceptors
.
upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
acceptorParams->setParameterValues(acceptorParamVector);
// Select an output buffer index for each donor and acceptor.
// Select an output buffer index for each donor and acceptor.
donorBufferIndices
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
donorBufferIndices
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
acceptorBufferIndices
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<int> donorBufferCounter(numParticles, 0);
vector<int> donorBufferCounter(numParticles, 0);
...
@@ -4631,8 +4512,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4631,8 +4512,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
donorBufferIndices
->
upload(donorBufferVector);
donorBufferIndices
.
upload(donorBufferVector);
acceptorBufferIndices
->
upload(acceptorBufferVector);
acceptorBufferIndices
.
upload(acceptorBufferVector);
int maxBuffers = 1;
int maxBuffers = 1;
for (int i : donorBufferCounter)
for (int i : donorBufferCounter)
maxBuffers = max(maxBuffers, i);
maxBuffers = max(maxBuffers, i);
...
@@ -4672,10 +4553,10 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4672,10 +4553,10 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
else
else
throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor");
throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor");
}
}
donorExclusions
= OpenCLArray::creat
e<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
donorExclusions
.initializ
e<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
acceptorExclusions
= OpenCLArray::creat
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
acceptorExclusions
.initializ
e<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
->
upload(donorExclusionVector);
donorExclusions
.
upload(donorExclusionVector);
acceptorExclusions
->
upload(acceptorExclusionVector);
acceptorExclusions
.
upload(acceptorExclusionVector);
// Record the tabulated functions.
// Record the tabulated functions.
...
@@ -4683,6 +4564,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4683,6 +4564,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
for (int i = 0; i < force.getNumFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -4691,8 +4573,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4691,8 +4573,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
tableArgs << ", __global const float";
tableArgs << ", __global const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -4707,8 +4589,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4707,8 +4589,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
map<string, string> variables;
map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i);
const string& name = force.getPerDonorParameterName(i);
...
@@ -4900,7 +4782,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4900,7 +4782,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0)
if (numDonors == 0 || numAcceptors == 0)
return 0.0;
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -4909,7 +4791,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
...
@@ -4909,7 +4791,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
if (!hasInitializedKernel) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
hasInitializedKernel = true;
...
@@ -4917,38 +4799,38 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
...
@@ -4917,38 +4799,38 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorExclusions
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorExclusions
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donors
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donors
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, acceptors
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, acceptors
.
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices
.
getDeviceBuffer());
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals
!= NULL
)
if (globals
.isInitialized()
)
donorKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers())
for (auto& buffer : donorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
for (auto& buffer : acceptorParams->getBuffers())
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
donorKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
index = 0;
index = 0;
acceptorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorExclusions
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, donors
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, donors
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptors
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptors
.
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices
.
getDeviceBuffer());
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 5; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals
!= NULL
)
if (globals
.isInitialized()
)
acceptorKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : donorParams->getBuffers())
for (auto& buffer : donorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
for (auto& buffer : acceptorParams->getBuffers())
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
acceptorKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
}
}
setPeriodicBoxArgs(cl, donorKernel, 8);
setPeriodicBoxArgs(cl, donorKernel, 8);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
...
@@ -5047,22 +4929,6 @@ private:
...
@@ -5047,22 +4929,6 @@ private:
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
}
}
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...
@@ -5100,22 +4966,22 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5100,22 +4966,22 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
for (int j = 0; j < normalizedWeights[i].size(); j++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
}
groupParticles
= OpenCLArray::creat
e<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles
.initializ
e<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles
->
upload(groupParticleVec);
groupParticles
.
upload(groupParticleVec);
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision()) {
groupWeights
= OpenCLArray::creat
e<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
.initializ
e<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecDouble);
groupWeights
.
upload(groupWeightVecDouble);
centerPositions
= OpenCLArray::creat
e<mm_double4>(cl, numGroups, "centerPositions");
centerPositions
.initializ
e<mm_double4>(cl, numGroups, "centerPositions");
}
}
else {
else {
groupWeights
= OpenCLArray::creat
e<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
.initializ
e<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecFloat);
groupWeights
.
upload(groupWeightVecFloat);
centerPositions
= OpenCLArray::creat
e<mm_float4>(cl, numGroups, "centerPositions");
centerPositions
.initializ
e<mm_float4>(cl, numGroups, "centerPositions");
}
}
groupOffsets
= OpenCLArray::creat
e<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets
.initializ
e<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets
->
upload(groupOffsetVec);
groupOffsets
.
upload(groupOffsetVec);
groupForces
= OpenCLArray::creat
e<long long>(cl, numGroups*3, "groupForces");
groupForces
.initializ
e<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(
*
groupForces);
cl.addAutoclearBuffer(groupForces);
// Record the bonds.
// Record the bonds.
...
@@ -5134,8 +5000,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5134,8 +5000,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
paramVector[i][j] = (float) parameters[j];
paramVector[i][j] = (float) parameters[j];
}
}
params->setParameterValues(paramVector);
params->setParameterValues(paramVector);
bondGroups
= OpenCLArray::creat
e<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups
.initializ
e<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups
->
upload(bondGroupVec);
bondGroups
.
upload(bondGroupVec);
// Record the tabulated functions.
// Record the tabulated functions.
...
@@ -5143,6 +5009,7 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5143,6 +5009,7 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -5151,8 +5018,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5151,8 +5018,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions
.back()->
upload(f);
tabulatedFunctions
[i].
upload(f);
extraArgs << ", __global const float";
extraArgs << ", __global const float";
if (width > 1)
if (width > 1)
extraArgs << width;
extraArgs << width;
...
@@ -5182,8 +5049,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5182,8 +5049,8 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
extraArgs << ", __global mixed* restrict energyParamDerivs";
extraArgs << ", __global mixed* restrict energyParamDerivs";
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
.initializ
e<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
extraArgs << ", __global const float* restrict globals";
extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
...
@@ -5394,37 +5261,37 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -5394,37 +5261,37 @@ void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, c
index = 0;
index = 0;
computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets
.
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions
->
getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions
.
getDeviceBuffer());
index = 0;
index = 0;
groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces
.
getDeviceBuffer());
index++; // Energy buffer hasn't been created yet
index++; // Energy buffer hasn't been created yet
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions
.
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups
.
getDeviceBuffer());
index += 5; // Periodic box information
index += 5; // Periodic box information
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
index++; // Deriv buffer hasn't been created yet.
index++; // Deriv buffer hasn't been created yet.
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
groupForcesKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
groupForcesKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
groupForcesKernel.setArg<cl::Memory>(index++, buffer.getMemory());
groupForcesKernel.setArg<cl::Memory>(index++, buffer.getMemory());
index = 0;
index = 0;
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets
.
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces
->
getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces
.
getDeviceBuffer());
}
}
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
if (numBonds == 0)
return 0.0;
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -5433,7 +5300,7 @@ double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bo
...
@@ -5433,7 +5300,7 @@ double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
...
@@ -5498,10 +5365,6 @@ private:
...
@@ -5498,10 +5365,6 @@ private:
OpenCLCalcCustomCompoundBondForceKernel::~OpenCLCalcCustomCompoundBondForceKernel() {
OpenCLCalcCustomCompoundBondForceKernel::~OpenCLCalcCustomCompoundBondForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
}
}
void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...
@@ -5532,16 +5395,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -5532,16 +5395,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
OpenCLArray* array = OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[i].initialize<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array);
tabulatedFunctions[i].upload(f);
array->upload(f);
string arrayName = cl.getBondedUtilities().addArgument(tabulatedFunctions[i].getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
string arrayName = cl.getBondedUtilities().addArgument(array->getDeviceBuffer(), width == 1 ? "float" : "float"+cl.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName));
functionDefinitions.push_back(make_pair(name, arrayName));
}
}
...
@@ -5565,9 +5428,9 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -5565,9 +5428,9 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
variables[name] = "bondParams"+params->getParameterSuffix(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals
->
getDeviceBuffer(), "float");
string argName = cl.getBondedUtilities().addArgument(globals
.
getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
...
@@ -5750,7 +5613,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
...
@@ -5750,7 +5613,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
}
double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -5759,7 +5622,7 @@ double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bo
...
@@ -5759,7 +5622,7 @@ double OpenCLCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -5827,34 +5690,6 @@ private:
...
@@ -5827,34 +5690,6 @@ private:
OpenCLCalcCustomManyParticleForceKernel::~OpenCLCalcCustomManyParticleForceKernel() {
OpenCLCalcCustomManyParticleForceKernel::~OpenCLCalcCustomManyParticleForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
}
}
void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...
@@ -5889,6 +5724,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5889,6 +5724,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -5897,8 +5733,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5897,8 +5733,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(OpenCLArray::creat
e<float>(cl, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cl, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
tableArgs << ", __global const float";
tableArgs << ", __global const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -5928,8 +5764,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5928,8 +5764,8 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
}
}
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= OpenCLArray::creat
e<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY);
globals
.initializ
e<cl_float>(cl, force.getNumGlobalParameters(), "customManyParticleGlobals", CL_MEM_READ_ONLY);
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cl.intToString(i)+"]";
string value = "globals["+cl.intToString(i)+"]";
...
@@ -5946,16 +5782,16 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5946,16 +5782,16 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1);
bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) {
if (hasTypeFilters) {
particleTypes
= OpenCLArray::creat
e<int>(cl, particleTypesVec.size(), "customManyParticleTypes");
particleTypes
.initializ
e<int>(cl, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
= OpenCLArray::creat
e<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex");
orderIndex
.initializ
e<int>(cl, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
= OpenCLArray::creat
e<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleOrder
.initializ
e<int>(cl, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
->
upload(particleTypesVec);
particleTypes
.
upload(particleTypesVec);
orderIndex
->
upload(orderIndexVec);
orderIndex
.
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
->
getSize());
vector<int> flattenedOrder(particleOrder
.
getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++)
for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder
->
upload(flattenedOrder);
particleOrder
.
upload(flattenedOrder);
}
}
// Build data structures for exclusions.
// Build data structures for exclusions.
...
@@ -5976,10 +5812,10 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5976,10 +5812,10 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size();
exclusionStartIndexVec[i+1] = exclusionsVec.size();
}
}
exclusions
= OpenCLArray::creat
e<int>(cl, exclusionsVec.size(), "customManyParticleExclusions");
exclusions
.initializ
e<int>(cl, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
= OpenCLArray::creat
e<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusionStartIndex
.initializ
e<int>(cl, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
->
upload(exclusionsVec);
exclusions
.
upload(exclusionsVec);
exclusionStartIndex
->
upload(exclusionStartIndexVec);
exclusionStartIndex
.
upload(exclusionStartIndexVec);
}
}
// Build data structures for the neighbor list.
// Build data structures for the neighbor list.
...
@@ -5987,18 +5823,18 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
...
@@ -5987,18 +5823,18 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cl.getNumAtomBlocks();
int numAtomBlocks = cl.getNumAtomBlocks();
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockCenter
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
= new OpenCLArray
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
blockBoundingBox
.initialize
(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
= OpenCLArray::creat
e<int>(cl, 1, "customManyParticleNumNeighborPairs");
numNeighborPairs
.initializ
e<int>(cl, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
= OpenCLArray::creat
e<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex");
neighborStartIndex
.initializ
e<int>(cl, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
= OpenCLArray::creat
e<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom");
numNeighborsForAtom
.initializ
e<int>(cl, numParticles, "customManyParticleNumNeighborsForAtom");
// Select a size for the array that holds the neighbor list. We have to make a fairly
// Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later.
// arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles;
maxNeighborPairs = 150*numParticles;
neighborPairs
= OpenCLArray::creat
e<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighborPairs
.initializ
e<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
= OpenCLArray::creat
e<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
neighbors
.initializ
e<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
@@ -6317,24 +6153,24 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6317,24 +6153,24 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, forceKernel, index);
setPeriodicBoxArgs(cl, forceKernel, index);
index += 5;
index += 5;
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
forceKernel.setArg<cl::Buffer>(index++, neighbors
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighbors
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
}
}
if (particleTypes
!= NULL
) {
if (particleTypes
.isInitialized()
) {
forceKernel.setArg<cl::Buffer>(index++, particleTypes
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleTypes
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, orderIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, orderIndex
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleOrder
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, particleOrder
.
getDeviceBuffer());
}
}
if (exclusions
!= NULL
) {
if (exclusions
.isInitialized()
) {
forceKernel.setArg<cl::Buffer>(index++, exclusions
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions
.
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
.
getDeviceBuffer());
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
forceKernel.setArg<cl::Buffer>(index++, globals
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, globals
.
getDeviceBuffer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
forceKernel.setArg<cl::Memory>(index++, buffer.getMemory());
forceKernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
forceKernel.setArg<cl::Buffer>(index++, function
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, function
.
getDeviceBuffer());
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel.
// Set arguments for the block bounds kernel.
...
@@ -6343,9 +6179,9 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6343,9 +6179,9 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, blockBoundsKernel, index);
setPeriodicBoxArgs(cl, blockBoundsKernel, index);
index += 5;
index += 5;
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
.
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
// Set arguments for the neighbor list kernel.
// Set arguments for the neighbor list kernel.
...
@@ -6353,36 +6189,36 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6353,36 +6189,36 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
setPeriodicBoxArgs(cl, neighborsKernel, index);
setPeriodicBoxArgs(cl, neighborsKernel, index);
index += 5;
index += 5;
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, neighborPairs
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
index++;
index++;
if (exclusions
!= NULL
) {
if (exclusions
.isInitialized()
) {
neighborsKernel.setArg<cl::Buffer>(index++, exclusions
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusions
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, exclusionStartIndex
.
getDeviceBuffer());
}
}
// Set arguments for the kernel to find neighbor list start indices.
// Set arguments for the kernel to find neighbor list start indices.
index = 0;
index = 0;
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
startIndicesKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
// Set arguments for the kernel to assemble the final neighbor list.
// Set arguments for the kernel to assemble the final neighbor list.
index = 0;
index = 0;
copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighbors
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighbors
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborPairs
.
getDeviceBuffer());
index++;
index++;
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, numNeighborsForAtom
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(index++, neighborStartIndex
.
getDeviceBuffer());
}
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
...
@@ -6391,7 +6227,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6391,7 +6227,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
while (true) {
while (true) {
int* numPairs = (int*) cl.getPinnedBuffer();
int* numPairs = (int*) cl.getPinnedBuffer();
...
@@ -6406,7 +6242,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6406,7 +6242,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// We need to make sure there was enough memory for the neighbor list. Download the
// We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time.
// information asynchronously so kernels can be running at the same time.
numNeighborPairs
->
download(numPairs, false);
numNeighborPairs
.
download(numPairs, false);
cl.getQueue().enqueueMarker(&event);
cl.getQueue().enqueueMarker(&event);
cl.executeKernel(startIndicesKernel, 256, 256);
cl.executeKernel(startIndicesKernel, 256, 256);
cl.executeKernel(copyPairsKernel, maxNeighborPairs);
cl.executeKernel(copyPairsKernel, maxNeighborPairs);
...
@@ -6420,17 +6256,13 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
...
@@ -6420,17 +6256,13 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
if (*numPairs > maxNeighborPairs) {
if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again.
// Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs));
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs
= OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs"
);
neighborPairs
.resize(maxNeighborPairs
);
neighbors
= OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors"
);
neighbors
.resize(maxNeighborPairs
);
forceKernel.setArg<cl::Buffer>(8, neighbors
->
getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(8, neighbors
.
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs
->
getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs
.
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors
->
getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors
.
getDeviceBuffer());
continue;
continue;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment