Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d0426ba9
"platforms/vscode:/vscode.git/clone" did not exist on "bf34e94737267a2c25ae9eb2386a71a72806fa33"
Commit
d0426ba9
authored
Jul 20, 2012
by
Peter Eastman
Browse files
Continuing converting AMOEBA to the new CUDA platform: starting on AmoebaMultipoleForce
parent
a99a2d0e
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
1242 additions
and
77 deletions
+1242
-77
platforms/cuda2/src/CudaArray.cpp
platforms/cuda2/src/CudaArray.cpp
+11
-0
platforms/cuda2/src/CudaArray.h
platforms/cuda2/src/CudaArray.h
+6
-0
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+4
-0
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+4
-0
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+5
-5
platforms/cuda2/src/CudaNonbondedUtilities.cpp
platforms/cuda2/src/CudaNonbondedUtilities.cpp
+6
-5
platforms/cuda2/src/CudaNonbondedUtilities.h
platforms/cuda2/src/CudaNonbondedUtilities.h
+13
-1
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernelFactory.cpp
...ns/amoeba/platforms/cuda2/src/AmoebaCudaKernelFactory.cpp
+3
-3
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+435
-62
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+40
-1
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
...amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
+361
-0
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
+218
-0
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaPME.cpp
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaPME.cpp
+136
-0
No files found.
platforms/cuda2/src/CudaArray.cpp
View file @
d0426ba9
...
...
@@ -78,3 +78,14 @@ void CudaArray::download(void* data, bool blocking) const {
throw
OpenMMException
(
str
.
str
());
}
}
void
CudaArray
::
copyTo
(
CudaArray
&
dest
)
const
{
if
(
dest
.
getSize
()
!=
size
||
dest
.
getElementSize
()
!=
elementSize
)
throw
OpenMMException
(
"Error copying array "
+
name
+
" to "
+
dest
.
getName
()
+
": The destination array does not match the size of the array"
);
CUresult
result
=
cuMemcpyDtoDAsync
(
dest
.
getDevicePointer
(),
pointer
,
size
*
elementSize
,
0
);
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
str
<<
"Error copying array "
<<
name
<<
" to "
<<
dest
.
getName
()
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
platforms/cuda2/src/CudaArray.h
View file @
d0426ba9
...
...
@@ -127,6 +127,12 @@ public:
* the destination array must be in page-locked memory.
*/
void
download
(
void
*
data
,
bool
blocking
=
true
)
const
;
/**
* Copy the values in the device memory to a second array.
*
* @param dest the destination array to copy to
*/
void
copyTo
(
CudaArray
&
dest
)
const
;
private:
CudaContext
&
context
;
CUdeviceptr
pointer
;
...
...
platforms/cuda2/src/CudaContext.cpp
View file @
d0426ba9
...
...
@@ -468,6 +468,10 @@ void CudaContext::clearBuffer(CUdeviceptr memory, int size) {
executeKernel
(
clearBufferKernel
,
args
,
size
,
128
);
}
void
CudaContext
::
addAutoclearBuffer
(
CudaArray
&
array
)
{
addAutoclearBuffer
(
array
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
void
CudaContext
::
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
memory
);
autoclearBufferSizes
.
push_back
(
size
/
4
);
...
...
platforms/cuda2/src/CudaContext.h
View file @
d0426ba9
...
...
@@ -231,6 +231,10 @@ public:
* @param size the size of the buffer in bytes
*/
void
clearBuffer
(
CUdeviceptr
memory
,
int
size
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/
void
addAutoclearBuffer
(
CudaArray
&
array
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
d0426ba9
...
...
@@ -1454,7 +1454,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cu
.
addAutoclearBuffer
(
pmeGrid
->
getDevicePointer
(),
pmeGrid
->
getSize
()
*
2
*
elementSize
);
cu
.
addAutoclearBuffer
(
*
pmeGrid
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
...
...
@@ -1928,8 +1928,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
}
bornSum
=
CudaArray
::
create
<
long
long
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"bornSum"
);
bornForce
=
CudaArray
::
create
<
long
long
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"bornForce"
);
cu
.
addAutoclearBuffer
(
bornSum
->
getDevicePointer
(),
bornSum
->
getSize
()
*
sizeof
(
long
long
)
);
cu
.
addAutoclearBuffer
(
bornForce
->
getDevicePointer
(),
bornForce
->
getSize
()
*
sizeof
(
long
long
)
);
cu
.
addAutoclearBuffer
(
*
bornSum
);
cu
.
addAutoclearBuffer
(
*
bornForce
);
CudaArray
&
posq
=
cu
.
getPosq
();
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
...
...
@@ -2757,7 +2757,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
cu
.
getNonbondedUtilities
().
addArgument
(
arguments
[
i
]);
}
cu
.
addForce
(
new
CudaCustomGBForceInfo
(
force
));
cu
.
addAutoclearBuffer
(
longEnergyDerivs
->
getDevicePointer
(),
sizeof
(
long
long
)
*
longEnergyDerivs
->
getSize
()
);
cu
.
addAutoclearBuffer
(
*
longEnergyDerivs
);
}
double
CudaCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -2766,7 +2766,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
hasInitializedKernels
=
true
;
maxTiles
=
(
nb
.
getUseCutoff
()
?
nb
.
getInteractingTiles
().
getSize
()
:
cu
.
getNumAtomBlocks
()
*
(
cu
.
getNumAtomBlocks
()
+
1
)
/
2
);
valueBuffers
=
CudaArray
::
create
<
long
long
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"customGBValueBuffers"
);
cu
.
addAutoclearBuffer
(
valueBuffers
->
getDevicePointer
(),
sizeof
(
long
long
)
*
valueBuffers
->
getSize
()
);
cu
.
addAutoclearBuffer
(
*
valueBuffers
);
cu
.
clearBuffer
(
valueBuffers
->
getDevicePointer
(),
sizeof
(
long
long
)
*
valueBuffers
->
getSize
());
pairValueArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
pairValueArgs
.
push_back
(
&
cu
.
getNonbondedUtilities
().
getExclusions
().
getDevicePointer
());
...
...
platforms/cuda2/src/CudaNonbondedUtilities.cpp
View file @
d0426ba9
...
...
@@ -177,9 +177,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
// Record the exclusion data.
exclusions
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
tilesWithExclusions
.
size
()
*
CudaContext
::
TileSize
,
"exclusions"
);
vector
<
unsigned
int
>
exclusionVec
(
exclusions
->
getSize
());
for
(
int
i
=
0
;
i
<
exclusions
->
getSize
();
++
i
)
exclusionVec
[
i
]
=
0xFFFFFFFF
;
vector
<
unsigned
int
>
exclusionVec
(
exclusions
->
getSize
(),
0xFFFFFFFF
);
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
...
...
@@ -249,6 +247,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
// Create kernels.
if
(
kernelSource
.
size
()
>
0
)
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
...
...
@@ -291,6 +290,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
}
int
CudaNonbondedUtilities
::
findExclusionIndex
(
int
x
,
int
y
,
const
vector
<
unsigned
int
>&
exclusionIndices
,
const
vector
<
unsigned
int
>&
exclusionRowIndices
)
{
if
(
x
<
y
)
throw
OpenMMException
(
"Internal error: called findExclusionIndex with x<y"
);
int
start
=
exclusionRowIndices
[
x
];
int
end
=
exclusionRowIndices
[
x
+
1
];
for
(
int
i
=
start
;
i
<
end
;
i
++
)
...
...
@@ -317,7 +318,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
}
void
CudaNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.
0
)
if
(
kernelSource
.
size
()
>
0
)
context
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
...
...
platforms/cuda2/src/CudaNonbondedUtilities.h
View file @
d0426ba9
...
...
@@ -232,8 +232,20 @@ public:
* @param isSymmetric specifies whether the interaction is symmetric
*/
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
);
private:
/**
* This is a utility routine for locating data in the exclusions array. It takes the (x,y) indices of a tile,
* and returns the location in the array where the data for that tile begins.
*
* This routine requires that x >= y. If not, it will throw an exception.
*
* @param x the x index of the tile
* @param y the y index of the tile
* @param exclusionIndices the content of the exclusionIndices array
* @param exclusionRowIndices the content of the exclusionRowIndices array
* @return the index in the exclusions array at which the data for that tile begins
*/
static
int
findExclusionIndex
(
int
x
,
int
y
,
const
std
::
vector
<
unsigned
int
>&
exclusionIndices
,
const
std
::
vector
<
unsigned
int
>&
exclusionRowIndices
);
private:
CudaContext
&
context
;
CUfunction
forceKernel
;
CUfunction
findBlockBoundsKernel
;
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernelFactory.cpp
View file @
d0426ba9
...
...
@@ -97,9 +97,9 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if
(
name
==
CalcAmoebaTorsionTorsionForceKernel
::
Name
())
return
new
CudaCalcAmoebaTorsionTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
//
if (name == CalcAmoebaMultipoleForceKernel::Name())
//
return new CudaCalcAmoebaMultipoleForceKernel(name, platform, cu, context.getSystem());
//
if
(
name
==
CalcAmoebaMultipoleForceKernel
::
Name
())
return
new
CudaCalcAmoebaMultipoleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
// if (name == CalcAmoebaGeneralizedKirkwoodForceKernel::Name())
// return new CudaCalcAmoebaGeneralizedKirkwoodForceKernel(name, platform, cu, context.getSystem());
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
d0426ba9
...
...
@@ -754,10 +754,10 @@ double CudaCalcAmoebaTorsionTorsionForceKernel::execute(ContextImpl& context, bo
return
0.0
;
}
//
/* -------------------------------------------------------------------------- *
//
* AmoebaMultipole *
//
* -------------------------------------------------------------------------- */
//
/* -------------------------------------------------------------------------- *
* AmoebaMultipole *
* -------------------------------------------------------------------------- */
//static void computeAmoebaMultipoleForce( CudaContext& cu ) {
//
// amoebaGpuContext gpu = data.getAmoebaGpu();
...
...
@@ -833,47 +833,330 @@ double CudaCalcAmoebaTorsionTorsionForceKernel::execute(ContextImpl& context, bo
// kCalculateAmoebaSystemMultipoleMoments( gpu, origin, outputMultipoleMonents );
//
//}
//
//class CudaCalcAmoebaMultipoleForceKernel::ForceInfo : public CudaForceInfo {
//public:
// ForceInfo(const AmoebaMultipoleForce& force) : force(force) {
class
CudaCalcAmoebaMultipoleForceKernel
::
ForceInfo
:
public
CudaForceInfo
{
public:
ForceInfo
(
const
AmoebaMultipoleForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
double
charge1
,
charge2
,
thole1
,
thole2
,
damping1
,
damping2
,
polarity1
,
polarity2
;
int
axis1
,
axis2
,
multipole11
,
multipole12
,
multipole21
,
multipole22
,
multipole31
,
multipole32
;
vector
<
double
>
dipole1
,
dipole2
,
quadrupole1
,
quadrupole2
;
force
.
getMultipoleParameters
(
particle1
,
charge1
,
dipole1
,
quadrupole1
,
axis1
,
multipole11
,
multipole21
,
multipole31
,
thole1
,
damping1
,
polarity1
);
force
.
getMultipoleParameters
(
particle2
,
charge2
,
dipole2
,
quadrupole2
,
axis2
,
multipole12
,
multipole22
,
multipole32
,
thole2
,
damping2
,
polarity2
);
if
(
charge1
!=
charge2
||
thole1
!=
thole2
||
damping1
!=
damping2
||
polarity1
!=
polarity2
||
axis1
!=
axis2
){
return
false
;
}
for
(
int
i
=
0
;
i
<
(
int
)
dipole1
.
size
();
++
i
){
if
(
dipole1
[
i
]
!=
dipole2
[
i
]){
return
false
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
quadrupole1
.
size
();
++
i
){
if
(
quadrupole1
[
i
]
!=
quadrupole2
[
i
]){
return
false
;
}
}
return
true
;
}
private:
const
AmoebaMultipoleForce
&
force
;
};
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
multipoleParticles
(
NULL
),
torqueBufferIndices
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
currentEpsilon
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
)
{
}
CudaCalcAmoebaMultipoleForceKernel
::~
CudaCalcAmoebaMultipoleForceKernel
()
{
cu
.
setAsCurrent
();
if
(
multipoleParticles
!=
NULL
)
delete
multipoleParticles
;
if
(
torqueBufferIndices
!=
NULL
)
delete
torqueBufferIndices
;
if
(
molecularDipoles
!=
NULL
)
delete
molecularDipoles
;
if
(
molecularQuadrupoles
!=
NULL
)
delete
molecularQuadrupoles
;
if
(
labFrameDipoles
!=
NULL
)
delete
labFrameDipoles
;
if
(
labFrameQuadrupoles
!=
NULL
)
delete
labFrameQuadrupoles
;
if
(
field
!=
NULL
)
delete
field
;
if
(
fieldPolar
!=
NULL
)
delete
fieldPolar
;
if
(
dampingAndThole
!=
NULL
)
delete
dampingAndThole
;
if
(
inducedDipole
!=
NULL
)
delete
inducedDipole
;
if
(
inducedDipolePolar
!=
NULL
)
delete
inducedDipolePolar
;
if
(
currentEpsilon
!=
NULL
)
delete
currentEpsilon
;
if
(
polarizability
!=
NULL
)
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
delete
covalentFlags
;
if
(
polarizationGroupFlags
!=
NULL
)
delete
polarizationGroupFlags
;
}
void
CudaCalcAmoebaMultipoleForceKernel
::
initialize
(
const
System
&
system
,
const
AmoebaMultipoleForce
&
force
)
{
cu
.
setAsCurrent
();
// Initialize multipole parameters.
numMultipoles
=
force
.
getNumMultipoles
();
CudaArray
&
posq
=
cu
.
getPosq
();
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
vector
<
float2
>
dampingAndTholeVec
;
vector
<
float
>
polarizabilityVec
;
vector
<
float
>
molecularDipolesVec
;
vector
<
float
>
molecularQuadrupolesVec
;
vector
<
int4
>
multipoleParticlesVec
;
for
(
int
i
=
0
;
i
<
numMultipoles
;
i
++
)
{
double
charge
,
thole
,
damping
,
polarity
;
int
axisType
,
atomX
,
atomY
,
atomZ
;
vector
<
double
>
dipole
,
quadrupole
;
force
.
getMultipoleParameters
(
i
,
charge
,
dipole
,
quadrupole
,
axisType
,
atomZ
,
atomX
,
atomY
,
thole
,
damping
,
polarity
);
if
(
cu
.
getUseDoublePrecision
())
posqd
[
i
]
=
make_double4
(
0
,
0
,
0
,
charge
);
else
posqf
[
i
]
=
make_float4
(
0
,
0
,
0
,
(
float
)
charge
);
dampingAndTholeVec
.
push_back
(
make_float2
((
float
)
damping
,
(
float
)
thole
));
polarizabilityVec
.
push_back
((
float
)
polarity
);
multipoleParticlesVec
.
push_back
(
make_int4
(
atomX
,
atomY
,
atomZ
,
axisType
));
for
(
int
j
=
0
;
j
<
3
;
j
++
)
molecularDipolesVec
.
push_back
((
float
)
dipole
[
j
]);
for
(
int
j
=
0
;
j
<
9
;
j
++
)
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
j
]);
}
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
for
(
int
i
=
numMultipoles
;
i
<
paddedNumAtoms
;
i
++
)
{
dampingAndTholeVec
.
push_back
(
make_float2
(
0
,
0
));
polarizabilityVec
.
push_back
(
0
);
multipoleParticlesVec
.
push_back
(
make_int4
(
0
,
0
,
0
,
0
));
for
(
int
j
=
0
;
j
<
3
;
j
++
)
molecularDipolesVec
.
push_back
(
0
);
for
(
int
j
=
0
;
j
<
9
;
j
++
)
molecularQuadrupolesVec
.
push_back
(
0
);
}
dampingAndThole
=
CudaArray
::
create
<
float2
>
(
cu
,
paddedNumAtoms
,
"dampingAndThole"
);
polarizability
=
CudaArray
::
create
<
float
>
(
cu
,
paddedNumAtoms
,
"polarizability"
);
multipoleParticles
=
CudaArray
::
create
<
int4
>
(
cu
,
paddedNumAtoms
,
"multipoleParticles"
);
molecularDipoles
=
CudaArray
::
create
<
float
>
(
cu
,
3
*
paddedNumAtoms
,
"molecularDipoles"
);
molecularQuadrupoles
=
CudaArray
::
create
<
float
>
(
cu
,
9
*
paddedNumAtoms
,
"molecularQuadrupoles"
);
dampingAndThole
->
upload
(
dampingAndTholeVec
);
polarizability
->
upload
(
polarizabilityVec
);
multipoleParticles
->
upload
(
multipoleParticlesVec
);
molecularDipoles
->
upload
(
molecularDipolesVec
);
molecularQuadrupoles
->
upload
(
molecularQuadrupolesVec
);
posq
.
upload
(
cu
.
getPinnedBuffer
());
// Create workspace arrays.
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
9
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
field
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"field"
);
fieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"fieldPolar"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
// Record which atoms should be flagged as exclusions based on covalent groups, and determine
// the values for the covalent group flags.
vector
<
vector
<
int
>
>
exclusions
(
numMultipoles
);
for
(
int
i
=
0
;
i
<
numMultipoles
;
i
++
)
{
vector
<
int
>
atoms
;
set
<
int
>
allAtoms
;
allAtoms
.
insert
(
i
);
force
.
getCovalentMap
(
i
,
AmoebaMultipoleForce
::
Covalent12
,
atoms
);
allAtoms
.
insert
(
atoms
.
begin
(),
atoms
.
end
());
force
.
getCovalentMap
(
i
,
AmoebaMultipoleForce
::
Covalent13
,
atoms
);
allAtoms
.
insert
(
atoms
.
begin
(),
atoms
.
end
());
for
(
set
<
int
>::
const_iterator
iter
=
allAtoms
.
begin
();
iter
!=
allAtoms
.
end
();
++
iter
)
covalentFlagValues
.
push_back
(
make_int3
(
i
,
*
iter
,
0
));
force
.
getCovalentMap
(
i
,
AmoebaMultipoleForce
::
Covalent14
,
atoms
);
allAtoms
.
insert
(
atoms
.
begin
(),
atoms
.
end
());
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
covalentFlagValues
.
push_back
(
make_int3
(
i
,
atoms
[
j
],
1
));
force
.
getCovalentMap
(
i
,
AmoebaMultipoleForce
::
Covalent15
,
atoms
);
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
covalentFlagValues
.
push_back
(
make_int3
(
i
,
atoms
[
j
],
2
));
allAtoms
.
insert
(
atoms
.
begin
(),
atoms
.
end
());
force
.
getCovalentMap
(
i
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
atoms
);
allAtoms
.
insert
(
atoms
.
begin
(),
atoms
.
end
());
exclusions
[
i
].
insert
(
exclusions
[
i
].
end
(),
allAtoms
.
begin
(),
allAtoms
.
end
());
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
polarizationFlagValues
.
push_back
(
make_int2
(
i
,
atoms
[
j
]));
}
// Create the kernels.
// Create the other kernels.
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numMultipoles
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"SCALING_DISTANCE_CUTOFF"
]
=
cu
.
doubleToString
(
50.0
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoles
,
defines
);
computeMomentsKernel
=
cu
.
getKernel
(
module
,
"computeLabFrameMoments"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
// Set up PME.
bool
usePME
=
(
force
.
getNonbondedMethod
()
==
AmoebaMultipoleForce
::
PME
);
// map<string, string> defines;
// alpha = 0;
// if (usePME) {
// // Compute the PME parameters.
//
// int gridSizeX, gridSizeY, gridSizeZ;
// NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
// gridSizeX = findFFTDimension(gridSizeX);
// gridSizeY = findFFTDimension(gridSizeY);
// gridSizeZ = findFFTDimension(gridSizeZ);
// defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
// defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
// defines["USE_EWALD"] = "1";
// pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
// pmeDefines["NUM_ATOMS"] = cu.intToString(numMultipoles);
// pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
// pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
// pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
// pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
// pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
// pmeDefines["M_PI"] = cu.doubleToString(M_PI);
// if (cu.getUseDoublePrecision())
// pmeDefines["USE_DOUBLE_PRECISION"] = "1";
// CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
// pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
// pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
// pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
// pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
// pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
// pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
// cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
//
// // Create required data structures.
//
// int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
// pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
// cu.addAutoclearBuffer(*pmeGrid);
// pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
// pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
// pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
// pmeBsplineTheta = new CudaArray(cu, PmeOrder*numMultipoles, 4*elementSize, "pmeBsplineTheta");
// pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
// pmeAtomGridIndex = CudaArray::create<int2>(cu, numMultipoles, "pmeAtomGridIndex");
// sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
// cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2Z : CUFFT_C2C);
// if (result != CUFFT_SUCCESS)
// throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
// hasInitializedFFT = true;
//
// // Initialize the b-spline moduli.
//
// int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
// vector<double> data(PmeOrder);
// vector<double> ddata(PmeOrder);
// vector<double> bsplines_data(maxSize);
// data[PmeOrder-1] = 0.0;
// data[1] = 0.0;
// data[0] = 1.0;
// for (int i = 3; i < PmeOrder; i++) {
// double div = 1.0/(i-1.0);
// data[i-1] = 0.0;
// for (int j = 1; j < (i-1); j++)
// data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
// data[0] = div*data[0];
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, thole1, thole2, damping1, damping2, polarity1, polarity2;
// int axis1, axis2, multipole11, multipole12, multipole21, multipole22, multipole31, multipole32;
// vector<double> dipole1, dipole2, quadrupole1, quadrupole2;
// force.getMultipoleParameters(particle1, charge1, dipole1, quadrupole1, axis1, multipole11, multipole21, multipole31, thole1, damping1, polarity1);
// force.getMultipoleParameters(particle2, charge2, dipole2, quadrupole2, axis2, multipole12, multipole22, multipole32, thole2, damping2, polarity2);
// if (charge1 != charge2 || thole1 != thole2 || damping1 != damping2 || polarity1 != polarity2 || axis1 != axis2){
// return false;
//
// // Differentiate.
//
// ddata[0] = -data[0];
// for (int i = 1; i < PmeOrder; i++)
// ddata[i] = data[i-1]-data[i];
// double div = 1.0/(PmeOrder-1);
// data[PmeOrder-1] = 0.0;
// for (int i = 1; i < (PmeOrder-1); i++)
// data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
// data[0] = div*data[0];
// for (int i = 0; i < maxSize; i++)
// bsplines_data[i] = 0.0;
// for (int i = 1; i <= PmeOrder; i++)
// bsplines_data[i] = data[i-1];
//
// // Evaluate the actual bspline moduli for X/Y/Z.
//
// for(int dim = 0; dim < 3; dim++) {
// int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
// vector<double> moduli(ndata);
// for (int i = 0; i < ndata; i++) {
// double sc = 0.0;
// double ss = 0.0;
// for (int j = 0; j < ndata; j++) {
// double arg = (2.0*M_PI*i*j)/ndata;
// sc += bsplines_data[j]*cos(arg);
// ss += bsplines_data[j]*sin(arg);
// }
// for (int i = 0; i < (int) dipole1.size(); ++i){
// if (dipole1[i] != dipole2[i]){
// return false;
// moduli[i] = sc*sc+ss*ss;
// }
// for (int i = 0; i < ndata; i++)
// if (moduli[i] < 1.0e-7)
// moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
// if (cu.getUseDoublePrecision()) {
// if (dim == 0)
// pmeBsplineModuliX->upload(moduli);
// else if (dim == 1)
// pmeBsplineModuliY->upload(moduli);
// else
// pmeBsplineModuliZ->upload(moduli);
// }
// for (int i = 0; i < (int) quadrupole1.size(); ++i){
// if (quadrupole1[i] != quadrupole2[i]){
// return false;
// else {
// vector<float> modulif(ndata);
// for (int i = 0; i < ndata; i++)
// modulif[i] = (float) moduli[i];
// if (dim == 0)
// pmeBsplineModuliX->upload(modulif);
// else if (dim == 1)
// pmeBsplineModuliY->upload(modulif);
// else
// pmeBsplineModuliZ->upload(modulif);
// }
// }
// return true;
// }
//private:
// const AmoebaMultipoleForce& force;
//};
//
//CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
// CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system) {
// data.incrementKernelCount();
//}
//
//CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
// data.decrementKernelCount();
//}
//
//void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
//
// Add an interaction to the default nonbonded kernel. This doesn't actually do any calculations. It's
// just so that CudaNonbondedUtilities will build the exclusion flags and maintain the neighbor list.
cu
.
getNonbondedUtilities
().
addInteraction
(
usePME
,
usePME
,
true
,
force
.
getCutoffDistance
(),
exclusions
,
""
,
force
.
getForceGroup
());
cu
.
addForce
(
new
ForceInfo
(
force
));
// numMultipoles = force.getNumMultipoles();
//
// data.setHasAmoebaMultipole( true );
...
...
@@ -1036,25 +1319,116 @@ double CudaCalcAmoebaTorsionTorsionForceKernel::execute(ContextImpl& context, bo
// gpu->sim.nonbondedMethod = PARTICLE_MESH_EWALD;
// }
// data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
//}
//
//double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// computeAmoebaMultipoleForce( data );
// return 0.0;
//}
//
//void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl& context, const std::vector< Vec3 >& inputGrid,
// std::vector< double >& outputElectrostaticPotential) {
}
void
CudaCalcAmoebaMultipoleForceKernel
::
initializeScaleFactors
()
{
hasInitializedScaleFactors
=
true
;
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
// Figure out the covalent flag values to use for each atom pair.
vector
<
unsigned
int
>
exclusionIndices
;
vector
<
unsigned
int
>
exclusionRowIndices
;
nb
.
getExclusionIndices
().
download
(
exclusionIndices
);
nb
.
getExclusionRowIndices
().
download
(
exclusionRowIndices
);
covalentFlags
=
CudaArray
::
create
<
uint2
>
(
cu
,
nb
.
getExclusions
().
getSize
(),
"covalentFlags"
);
vector
<
uint2
>
covalentFlagsVec
(
nb
.
getExclusions
().
getSize
(),
make_uint2
(
0
,
0
));
for
(
int
i
=
0
;
i
<
(
int
)
covalentFlagValues
.
size
();
i
++
)
{
int
atom1
=
covalentFlagValues
[
i
].
x
;
int
atom2
=
covalentFlagValues
[
i
].
y
;
int
value
=
covalentFlagValues
[
i
].
z
;
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
int
y
=
atom2
/
CudaContext
::
TileSize
;
int
offset2
=
atom2
-
y
*
CudaContext
::
TileSize
;
int
f1
=
(
value
==
0
||
value
==
1
?
1
:
0
);
int
f2
=
(
value
==
0
||
value
==
2
?
1
:
0
);
if
(
x
>
y
)
{
int
index
=
CudaNonbondedUtilities
::
findExclusionIndex
(
x
,
y
,
exclusionIndices
,
exclusionRowIndices
);
covalentFlagsVec
[
index
+
offset1
].
x
|=
f1
<<
offset2
;
covalentFlagsVec
[
index
+
offset1
].
y
|=
f2
<<
offset2
;
}
else
{
int
index
=
CudaNonbondedUtilities
::
findExclusionIndex
(
y
,
x
,
exclusionIndices
,
exclusionRowIndices
);
covalentFlagsVec
[
index
+
offset2
].
x
|=
f1
<<
offset1
;
covalentFlagsVec
[
index
+
offset2
].
y
|=
f2
<<
offset1
;
}
}
covalentFlags
->
upload
(
covalentFlagsVec
);
// Do the same for the polarization flags.
polarizationGroupFlags
=
CudaArray
::
create
<
unsigned
int
>
(
cu
,
nb
.
getExclusions
().
getSize
(),
"polarizationGroupFlags"
);
vector
<
unsigned
int
>
polarizationGroupFlagsVec
(
nb
.
getExclusions
().
getSize
(),
0
);
for
(
int
i
=
0
;
i
<
(
int
)
polarizationFlagValues
.
size
();
i
++
)
{
int
atom1
=
polarizationFlagValues
[
i
].
x
;
int
atom2
=
polarizationFlagValues
[
i
].
y
;
int
x
=
atom1
/
CudaContext
::
TileSize
;
int
offset1
=
atom1
-
x
*
CudaContext
::
TileSize
;
int
y
=
atom2
/
CudaContext
::
TileSize
;
int
offset2
=
atom2
-
y
*
CudaContext
::
TileSize
;
if
(
x
>
y
)
{
int
index
=
CudaNonbondedUtilities
::
findExclusionIndex
(
x
,
y
,
exclusionIndices
,
exclusionRowIndices
);
polarizationGroupFlagsVec
[
index
+
offset1
]
|=
1
<<
offset2
;
}
else
{
int
index
=
CudaNonbondedUtilities
::
findExclusionIndex
(
y
,
x
,
exclusionIndices
,
exclusionRowIndices
);
polarizationGroupFlagsVec
[
index
+
offset2
]
|=
1
<<
offset1
;
}
}
polarizationGroupFlags
->
upload
(
polarizationGroupFlagsVec
);
}
double
CudaCalcAmoebaMultipoleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
!
hasInitializedScaleFactors
)
initializeScaleFactors
();
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
// Compute the lab frame moments.
void
*
computeMomentsArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
(),
&
molecularDipoles
->
getDevicePointer
(),
&
molecularQuadrupoles
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
()};
cu
.
executeKernel
(
computeMomentsKernel
,
computeMomentsArgs
,
cu
.
getPaddedNumAtoms
());
vector
<
float
>
d
,
q
;
labFrameDipoles
->
download
(
d
);
labFrameQuadrupoles
->
download
(
q
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d %g %g %g
\n
"
,
i
,
d
[
3
*
i
],
d
[
3
*
i
+
1
],
d
[
3
*
i
+
2
]);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d %g %g %g %g %g %g %g %g %g
\n
"
,
i
,
q
[
9
*
i
],
q
[
9
*
i
+
1
],
q
[
9
*
i
+
2
],
q
[
9
*
i
+
3
],
q
[
9
*
i
+
4
],
q
[
9
*
i
+
5
],
q
[
9
*
i
+
6
],
q
[
9
*
i
+
7
],
q
[
9
*
i
+
8
]);
int
startTileIndex
=
nb
.
getStartTileIndex
();
int
numTileIndices
=
nb
.
getNumTiles
();
int
numForceThreadBlocks
=
nb
.
getNumForceThreadBlocks
();
int
forceThreadBlockSize
=
nb
.
getForceThreadBlockSize
();
if
(
pmeGrid
==
NULL
)
{
void
*
computeFixedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusions
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeFixedFieldKernel
,
computeFixedFieldArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
vector
<
unsigned
long
long
>
f
;
field
->
download
(
f
);
int
pad
=
cu
.
getPaddedNumAtoms
();
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
{
printf
(
"%d %g %g %g
\n
"
,
i
,
f
[
i
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
pad
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
pad
*
2
]
/
(
double
)
0xFFFFFFFF
);
}
}
return
0.0
;
}
void
CudaCalcAmoebaMultipoleForceKernel
::
getElectrostaticPotential
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
inputGrid
,
std
::
vector
<
double
>&
outputElectrostaticPotential
)
{
// computeAmoebaMultipolePotential( data, inputGrid, outputElectrostaticPotential );
//
return;
//
}
//
//
void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl& context, const Vec3& origin,
//
std::vector< double >& outputMultipoleMonents) {
return
;
}
void
CudaCalcAmoebaMultipoleForceKernel
::
getSystemMultipoleMoments
(
ContextImpl
&
context
,
const
Vec3
&
origin
,
std
::
vector
<
double
>&
outputMultipoleMonents
)
{
// computeAmoebaSystemMultipoleMoments( data, origin, outputMultipoleMonents);
//
return;
//
}
//
return
;
}
///* -------------------------------------------------------------------------- *
// * AmoebaGeneralizedKirkwood *
// * -------------------------------------------------------------------------- */
...
...
@@ -1264,9 +1638,8 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
hasInitializedNonbonded
=
true
;
nonbonded
->
initialize
(
system
);
}
const
char
*
errorMessage
=
"Error copying array"
;
CHECK_RESULT
(
cuMemcpyDtoDAsync
(
tempPosq
->
getDevicePointer
(),
cu
.
getPosq
().
getDevicePointer
(),
tempPosq
->
getSize
()
*
tempPosq
->
getElementSize
(),
0
));
CHECK_RESULT
(
cuMemcpyDtoDAsync
(
tempForces
->
getDevicePointer
(),
cu
.
getForce
().
getDevicePointer
(),
tempForces
->
getSize
()
*
tempForces
->
getElementSize
(),
0
));
cu
.
getPosq
().
copyTo
(
*
tempPosq
);
cu
.
getForce
().
copyTo
(
*
tempForces
);
void
*
prepareArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
tempPosq
->
getDevicePointer
(),
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
prepareKernel
,
prepareArgs
,
cu
.
getPaddedNumAtoms
());
...
...
@@ -1274,8 +1647,8 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
nonbonded
->
computeInteractions
();
void
*
spreadArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
tempForces
->
getDevicePointer
(),
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
spreadKernel
,
spreadArgs
,
cu
.
getPaddedNumAtoms
());
CHECK_RESULT
(
cuMemcpyDtoDAsync
(
cu
.
getPosq
().
getDevicePointer
(),
tempPosq
->
getDevicePointer
(),
tempPosq
->
getSize
()
*
tempPosq
->
getElementSize
(),
0
));
CHECK_RESULT
(
cuMemcpyDtoDAsync
(
cu
.
getForce
().
getDevicePointer
(),
tempForces
->
getDevicePointer
(),
tempForces
->
getSize
()
*
tempForces
->
getElementSize
(),
0
));
tempPosq
->
copyTo
(
cu
.
getPosq
(
));
tempForces
->
copyTo
(
cu
.
getForce
(
));
return
0.0
;
}
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
d0426ba9
...
...
@@ -30,8 +30,10 @@
#include "openmm/amoebaKernels.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include "CudaSort.h"
#include <cufft.h>
namespace
OpenMM
{
...
...
@@ -371,9 +373,46 @@ public:
private:
class
ForceInfo
;
void
initializeScaleFactors
();
int
numMultipoles
;
bool
hasInitializedScaleFactors
;
CudaContext
&
cu
;
System
&
system
;
std
::
vector
<
int3
>
covalentFlagValues
;
std
::
vector
<
int2
>
polarizationFlagValues
;
CudaArray
*
multipoleParticles
;
CudaArray
*
torqueBufferIndices
;
CudaArray
*
molecularDipoles
;
CudaArray
*
molecularQuadrupoles
;
CudaArray
*
labFrameDipoles
;
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
field
;
CudaArray
*
fieldPolar
;
CudaArray
*
dampingAndThole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
currentEpsilon
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
pmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeTheta1
;
CudaArray
*
pmeTheta2
;
CudaArray
*
pmeIgrid
;
CudaArray
*
pmePhi
;
CudaArray
*
pmePhid
;
CudaArray
*
pmePhip
;
CudaArray
*
pmePhidp
;
CudaArray
*
pmeBsplineTheta
;
CudaArray
*
pmeBsplineDTheta
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
computeFixedFieldKernel
;
};
/**
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
0 → 100644
View file @
d0426ba9
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real4
posq
;
real3
field
,
fieldPolar
,
dipole
;
real
quadrupoleXX
,
quadrupoleXY
,
quadrupoleXZ
;
real
quadrupoleYY
,
quadrupoleYZ
,
quadrupoleZZ
;
float
thole
,
damp
;
}
AtomData
;
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
float2
*
__restrict__
dampingAndThole
)
{
data
.
posq
=
posq
[
atom
];
data
.
dipole
.
x
=
labFrameDipole
[
atom
*
3
];
data
.
dipole
.
y
=
labFrameDipole
[
atom
*
3
+
1
];
data
.
dipole
.
z
=
labFrameDipole
[
atom
*
3
+
2
];
data
.
quadrupoleXX
=
labFrameQuadrupole
[
atom
*
9
];
data
.
quadrupoleXY
=
labFrameQuadrupole
[
atom
*
9
+
1
];
data
.
quadrupoleXZ
=
labFrameQuadrupole
[
atom
*
9
+
2
];
data
.
quadrupoleYY
=
labFrameQuadrupole
[
atom
*
9
+
4
];
data
.
quadrupoleYZ
=
labFrameQuadrupole
[
atom
*
9
+
5
];
data
.
quadrupoleZZ
=
labFrameQuadrupole
[
atom
*
9
+
8
];
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
real3
deltaR
,
real3
&
field1
,
real3
&
field2
)
{
real
rI
=
RSQRT
(
dot
(
deltaR
,
deltaR
));
real
r
=
RECIP
(
rI
);
real
r2I
=
rI
*
rI
;
real
rr3
=
rI
*
r2I
;
real
rr5
=
3
*
rr3
*
r2I
;
real
rr7
=
5
*
rr5
*
r2I
;
// get scaling factors, if needed
float
damp
=
atom1
.
damp
*
atom2
.
damp
;
real
dampExp
;
if
(
damp
!=
0
&&
r
<
SCALING_DISTANCE_CUTOFF
)
{
// get scaling factors
real
ratio
=
r
/
damp
;
float
pGamma
=
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
dampExp
=
EXP
(
-
damp
);
}
else
dampExp
=
0
;
rr3
*=
1
-
dampExp
;
rr5
*=
1
-
(
1
+
damp
)
*
dampExp
;
rr7
*=
1
-
(
1
+
damp
+
(
0.6
f
*
damp
*
damp
))
*
dampExp
;
real
rr5_2
=
2
*
rr5
;
real3
qDotDelta
;
qDotDelta
.
x
=
deltaR
.
x
*
atom2
.
quadrupoleXX
+
deltaR
.
y
*
atom2
.
quadrupoleXY
+
deltaR
.
z
*
atom2
.
quadrupoleXZ
;
qDotDelta
.
y
=
deltaR
.
x
*
atom2
.
quadrupoleXY
+
deltaR
.
y
*
atom2
.
quadrupoleYY
+
deltaR
.
z
*
atom2
.
quadrupoleYZ
;
qDotDelta
.
z
=
deltaR
.
x
*
atom2
.
quadrupoleXZ
+
deltaR
.
y
*
atom2
.
quadrupoleYZ
+
deltaR
.
z
*
atom2
.
quadrupoleZZ
;
real
dotdd
=
dot
(
deltaR
,
atom2
.
dipole
);
real
dotqd
=
dot
(
deltaR
,
qDotDelta
);
real
factor
=
-
rr3
*
atom2
.
posq
.
w
+
rr5
*
dotdd
-
rr7
*
dotqd
;
field1
=
deltaR
*
factor
-
rr3
*
atom2
.
dipole
+
rr5_2
*
qDotDelta
;
qDotDelta
.
x
=
deltaR
.
x
*
atom1
.
quadrupoleXX
+
deltaR
.
y
*
atom1
.
quadrupoleXY
+
deltaR
.
z
*
atom1
.
quadrupoleXZ
;
qDotDelta
.
y
=
deltaR
.
x
*
atom1
.
quadrupoleXY
+
deltaR
.
y
*
atom1
.
quadrupoleYY
+
deltaR
.
z
*
atom1
.
quadrupoleYZ
;
qDotDelta
.
z
=
deltaR
.
x
*
atom1
.
quadrupoleXZ
+
deltaR
.
y
*
atom1
.
quadrupoleYZ
+
deltaR
.
z
*
atom1
.
quadrupoleZZ
;
dotdd
=
dot
(
deltaR
,
atom1
.
dipole
);
dotqd
=
dot
(
deltaR
,
qDotDelta
);
factor
=
rr3
*
atom1
.
posq
.
w
+
rr5
*
dotdd
+
rr7
*
dotqd
;
field2
=
deltaR
*
factor
-
rr3
*
atom1
.
dipole
-
rr5_2
*
qDotDelta
;
}
__device__
real
computeDScaleFactor
(
unsigned
int
polarizationGroup
)
{
return
(
polarizationGroup
&
1
?
0
:
1
);
}
__device__
float
computeDScaleFactor
(
uint2
covalent
)
{
// int f1 = (value == 0 || value == 1 ? 1 : 0);
// int f2 = (value == 0 || value == 2 ? 1 : 0);
// 0 = 12 or 13: x and y: 0
// 1 = 14: x: 0.4
// 2 = 15: y: 0.8
bool
x
=
(
covalent
.
x
&
1
);
bool
y
=
(
covalent
.
y
&
1
);
return
(
x
?
(
y
?
0.0
f
:
0.4
f
)
:
(
y
?
0.8
f
:
1.0
f
));
}
__device__
float
computePScaleFactor
(
uint2
covalent
,
unsigned
int
polarizationGroup
)
{
bool
x
=
(
covalent
.
x
&
1
);
bool
y
=
(
covalent
.
y
&
1
);
bool
p
=
(
polarizationGroup
&
1
);
return
(
x
&&
y
?
0.0
f
:
(
x
&&
p
?
0.5
f
:
1.0
f
));
}
/**
* Compute nonbonded interactions.
*/
extern
"C"
__global__
void
computeFixedField
(
unsigned
long
long
*
__restrict__
fieldBuffers
,
unsigned
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
ushort2
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
unsigned
int
*
__restrict__
interactionFlags
,
#endif
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
float2
*
__restrict__
dampingAndThole
)
{
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
numTileIndices
/
totalWarps
:
warp
*
numTiles
/
totalWarps
);
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
numTiles
/
totalWarps
);
#else
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
startTileIndex
+
warp
*
numTiles
/
totalWarps
;
unsigned
int
end
=
startTileIndex
+
(
warp
+
1
)
*
numTiles
/
totalWarps
;
#endif
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
unsigned
int
exclusionRange
[
2
*
WARPS_PER_GROUP
];
__shared__
int
exclusionIndex
[
WARPS_PER_GROUP
];
#ifndef ENABLE_SHUFFLE
__shared__
real
tempBuffer
[
3
*
THREAD_BLOCK_SIZE
];
#endif
do
{
// Extract the coordinates of this tile
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
AtomData
data
;
data
.
field
=
make_real3
(
0
);
data
.
fieldPolar
=
make_real3
(
0
);
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles
[
pos
];
x
=
tileIndices
.
x
;
y
=
tileIndices
.
y
;
}
else
#endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrameQuadrupole
,
dampingAndThole
);
// Locate the exclusion data for this tile.
if
(
tgx
<
2
)
exclusionRange
[
2
*
localGroupIndex
+
tgx
]
=
exclusionRowIndices
[
x
+
tgx
];
if
(
tgx
==
0
)
exclusionIndex
[
localGroupIndex
]
=
-
1
;
for
(
unsigned
int
i
=
exclusionRange
[
2
*
localGroupIndex
]
+
tgx
;
i
<
exclusionRange
[
2
*
localGroupIndex
+
1
];
i
+=
TILE_SIZE
)
if
(
exclusionIndices
[
i
]
==
y
)
exclusionIndex
[
localGroupIndex
]
=
i
*
TILE_SIZE
;
bool
hasExclusions
=
(
exclusionIndex
[
localGroupIndex
]
>
-
1
);
if
(
pos
>=
end
)
;
// This warp is done.
else
if
(
x
==
y
)
{
// This tile is on the diagonal.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
localData
[
localAtomIndex
].
posq
=
data
.
posq
;
localData
[
localAtomIndex
].
dipole
=
data
.
dipole
;
localData
[
localAtomIndex
].
quadrupoleXX
=
data
.
quadrupoleXX
;
localData
[
localAtomIndex
].
quadrupoleXY
=
data
.
quadrupoleXY
;
localData
[
localAtomIndex
].
quadrupoleXZ
=
data
.
quadrupoleXZ
;
localData
[
localAtomIndex
].
quadrupoleYY
=
data
.
quadrupoleYY
;
localData
[
localAtomIndex
].
quadrupoleYZ
=
data
.
quadrupoleYZ
;
localData
[
localAtomIndex
].
quadrupoleZZ
=
data
.
quadrupoleZZ
;
localData
[
localAtomIndex
].
thole
=
data
.
thole
;
// IS THIS CORRECT?
localData
[
localAtomIndex
].
damp
=
data
.
damp
;
// IS THIS CORRECT?
unsigned
int
excl
=
exclusions
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
uint2
covalent
=
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
unsigned
int
polarizationGroup
=
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
bool
isExcluded
=
!
(
excl
&
0x1
);
int
atom2
=
tbx
+
j
;
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
field1
;
real3
field2
;
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
field1
,
field2
);
if
(
!
isExcluded
)
{
float
d
=
computeDScaleFactor
(
covalent
);
data
.
field
+=
d
*
field1
;
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
data
.
fieldPolar
+=
p
*
field1
;
}
excl
>>=
1
;
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
}
}
else
{
// This is an off-diagonal tile.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
loadAtomData
(
localData
[
localAtomIndex
],
j
,
posq
,
labFrameDipole
,
labFrameQuadrupole
,
dampingAndThole
);
localData
[
localAtomIndex
].
field
=
make_real3
(
0
);
localData
[
localAtomIndex
].
fieldPolar
=
make_real3
(
0
);
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
if
(
!
hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
// No interactions in this tile.
}
else
{
// Compute only a subset of the interactions in this tile.
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
bool
isExcluded
=
false
;
int
atom2
=
tbx
+
j
;
int
bufferIndex
=
3
*
threadIdx
.
x
;
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
dEdR2
.
z
+=
__shfl_xor
(
dEdR2
.
z
,
i
,
32
);
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
j
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
j
].
fz
+=
dEdR2
.
z
;
}
#else
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
// Sum the forces on atom2.
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
bufferIndex
]
+=
tempBuffer
[
bufferIndex
+
3
]
+
tempBuffer
[
bufferIndex
+
6
]
+
tempBuffer
[
bufferIndex
+
9
];
tempBuffer
[
bufferIndex
+
1
]
+=
tempBuffer
[
bufferIndex
+
4
]
+
tempBuffer
[
bufferIndex
+
7
]
+
tempBuffer
[
bufferIndex
+
10
];
tempBuffer
[
bufferIndex
+
2
]
+=
tempBuffer
[
bufferIndex
+
5
]
+
tempBuffer
[
bufferIndex
+
8
]
+
tempBuffer
[
bufferIndex
+
11
];
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
tempBuffer
[
bufferIndex
]
+
tempBuffer
[
bufferIndex
+
12
]
+
tempBuffer
[
bufferIndex
+
24
]
+
tempBuffer
[
bufferIndex
+
36
]
+
tempBuffer
[
bufferIndex
+
48
]
+
tempBuffer
[
bufferIndex
+
60
]
+
tempBuffer
[
bufferIndex
+
72
]
+
tempBuffer
[
bufferIndex
+
84
];
localData
[
tbx
+
j
].
fy
+=
tempBuffer
[
bufferIndex
+
1
]
+
tempBuffer
[
bufferIndex
+
13
]
+
tempBuffer
[
bufferIndex
+
25
]
+
tempBuffer
[
bufferIndex
+
37
]
+
tempBuffer
[
bufferIndex
+
49
]
+
tempBuffer
[
bufferIndex
+
61
]
+
tempBuffer
[
bufferIndex
+
73
]
+
tempBuffer
[
bufferIndex
+
85
];
localData
[
tbx
+
j
].
fz
+=
tempBuffer
[
bufferIndex
+
2
]
+
tempBuffer
[
bufferIndex
+
14
]
+
tempBuffer
[
bufferIndex
+
26
]
+
tempBuffer
[
bufferIndex
+
38
]
+
tempBuffer
[
bufferIndex
+
50
]
+
tempBuffer
[
bufferIndex
+
62
]
+
tempBuffer
[
bufferIndex
+
74
]
+
tempBuffer
[
bufferIndex
+
86
];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned
int
excl
=
(
hasExclusions
?
exclusions
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0xFFFFFFFF
);
uint2
covalent
=
(
hasExclusions
?
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
make_uint2
(
0
,
0
));
unsigned
int
polarizationGroup
=
(
hasExclusions
?
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0
);
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TILE_SIZE
-
tgx
));
covalent
.
x
=
(
covalent
.
x
>>
tgx
)
|
(
covalent
.
x
<<
(
TILE_SIZE
-
tgx
));
covalent
.
y
=
(
covalent
.
y
>>
tgx
)
|
(
covalent
.
y
<<
(
TILE_SIZE
-
tgx
));
polarizationGroup
=
(
polarizationGroup
>>
tgx
)
|
(
polarizationGroup
<<
(
TILE_SIZE
-
tgx
));
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
bool
isExcluded
=
!
(
excl
&
0x1
);
int
atom2
=
tbx
+
tj
;
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real3
field1
;
real3
field2
;
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
field1
,
field2
);
if
(
!
isExcluded
)
{
float
d
=
computeDScaleFactor
(
covalent
);
data
.
field
+=
d
*
field1
;
localData
[
atom2
].
field
+=
d
*
field2
;
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
data
.
fieldPolar
+=
p
*
field1
;
localData
[
atom2
].
fieldPolar
+=
p
*
field2
;
}
excl
>>=
1
;
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
}
}
// Write results.
if
(
pos
<
end
)
{
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
if
(
pos
<
end
&&
x
!=
y
)
{
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
fieldBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
field
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fieldPolar
.
z
*
0xFFFFFFFF
)));
}
pos
++
;
}
while
(
pos
<
end
);
}
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
0 → 100644
View file @
d0426ba9
extern
"C"
__global__
void
computeLabFrameMoments
(
real4
*
__restrict__
posq
,
int4
*
__restrict__
multipoleParticles
,
float
*
__restrict__
molecularDipoles
,
float
*
__restrict__
molecularQuadrupoles
,
real
*
__restrict__
labFrameDipoles
,
real
*
__restrict__
labFrameQuadrupoles
)
{
// get coordinates of this atom and the z & x axis atoms
// compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
// this atom and the axis atom
// this atom is referred to as the k-atom in notes below
// code common to ZThenX and Bisector
for
(
int
particleIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particleIndex
<
NUM_ATOMS
;
particleIndex
+=
gridDim
.
x
*
blockDim
.
x
)
{
int4
particles
=
multipoleParticles
[
particleIndex
];
if
(
particles
.
x
>=
0
&&
particles
.
z
>=
0
)
{
real4
thisParticlePos
=
posq
[
particleIndex
];
real4
posZ
=
posq
[
particles
.
z
];
real3
vectorZ
=
make_real3
(
posZ
.
x
-
thisParticlePos
.
x
,
posZ
.
y
-
thisParticlePos
.
y
,
posZ
.
z
-
thisParticlePos
.
z
);
real4
posX
=
posq
[
particles
.
x
];
real3
vectorX
=
make_real3
(
posX
.
x
-
thisParticlePos
.
x
,
posX
.
y
-
thisParticlePos
.
y
,
posX
.
z
-
thisParticlePos
.
z
);
int
axisType
=
particles
.
w
;
/*
z-only
(1) norm z
(2) select random x
(3) x = x - (x.z)z
(4) norm x
z-then-x
(1) norm z
(2) norm x (not needed)
(3) x = x - (x.z)z
(4) norm x
bisector
(1) norm z
(2) norm x
(3) z = x + z
(4) norm z
(5) x = x - (x.z)z
(6) norm x
z-bisect
(1) norm z
(2) norm x
(3) norm y
(3) x = x + y
(4) norm x
(5) x = x - (x.z)z
(6) norm x
3-fold
(1) norm z
(2) norm x
(3) norm y
(4) z = x + y + z
(5) norm z
(6) x = x - (x.z)z
(7) norm x
*/
// branch based on axis type
vectorZ
=
normalize
(
vectorZ
);
if
(
axisType
==
1
)
{
// bisector
vectorX
=
normalize
(
vectorX
);
vectorZ
+=
vectorX
;
vectorZ
=
normalize
(
vectorZ
);
}
else
if
(
axisType
==
2
||
axisType
==
3
)
{
// z-bisect
if
(
particles
.
y
>=
0
&&
particles
.
y
<
NUM_ATOMS
)
{
real4
posY
=
posq
[
particles
.
y
];
real3
vectorY
=
make_real3
(
posY
.
x
-
thisParticlePos
.
x
,
posY
.
y
-
thisParticlePos
.
y
,
posY
.
z
-
thisParticlePos
.
z
);
vectorY
=
normalize
(
vectorY
);
vectorX
=
normalize
(
vectorX
);
if
(
axisType
==
2
)
{
vectorX
+=
vectorY
;
vectorX
=
normalize
(
vectorX
);
}
else
{
// 3-fold
vectorZ
+=
vectorX
+
vectorY
;
vectorZ
=
normalize
(
vectorZ
);
}
}
}
else
if
(
axisType
>=
4
)
vectorX
=
make_real3
((
real
)
0.1
f
);
// x = x - (x.z)z
vectorX
-=
dot
(
vectorZ
,
vectorX
)
*
vectorZ
;
vectorX
=
normalize
(
vectorX
);
real3
vectorY
=
cross
(
vectorZ
,
vectorX
);
// use identity rotation matrix for unrecognized axis types
if
(
axisType
<
0
||
axisType
>
4
)
{
vectorX
.
x
=
1
;
vectorX
.
y
=
0
;
vectorX
.
z
=
0
;
vectorY
.
x
=
0
;
vectorY
.
y
=
1
;
vectorY
.
z
=
0
;
vectorZ
.
x
=
0
;
vectorZ
.
y
=
0
;
vectorZ
.
z
=
1
;
}
// Check the chirality and see whether it needs to be reversed
bool
reverse
=
false
;
if
(
axisType
!=
0
&&
particles
.
x
>=
0
&&
particles
.
y
>=
0
&&
particles
.
z
>=
0
)
{
real4
posY
=
posq
[
particles
.
y
];
real
delta
[
4
][
3
];
delta
[
0
][
0
]
=
thisParticlePos
.
x
-
posY
.
x
;
delta
[
0
][
1
]
=
thisParticlePos
.
y
-
posY
.
y
;
delta
[
0
][
2
]
=
thisParticlePos
.
z
-
posY
.
z
;
delta
[
1
][
0
]
=
posZ
.
x
-
posY
.
x
;
delta
[
1
][
1
]
=
posZ
.
y
-
posY
.
y
;
delta
[
1
][
2
]
=
posZ
.
z
-
posY
.
z
;
delta
[
2
][
0
]
=
posX
.
x
-
posY
.
x
;
delta
[
2
][
1
]
=
posX
.
y
-
posY
.
y
;
delta
[
2
][
2
]
=
posX
.
z
-
posY
.
z
;
delta
[
3
][
0
]
=
delta
[
1
][
1
]
*
delta
[
2
][
2
]
-
delta
[
1
][
2
]
*
delta
[
2
][
1
];
delta
[
3
][
1
]
=
delta
[
2
][
1
]
*
delta
[
0
][
2
]
-
delta
[
2
][
2
]
*
delta
[
0
][
1
];
delta
[
3
][
2
]
=
delta
[
0
][
1
]
*
delta
[
1
][
2
]
-
delta
[
0
][
2
]
*
delta
[
1
][
1
];
real
volume
=
delta
[
3
][
0
]
*
delta
[
0
][
0
]
+
delta
[
3
][
1
]
*
delta
[
1
][
0
]
+
delta
[
3
][
2
]
*
delta
[
2
][
0
];
reverse
=
(
volume
<
0
);
}
// Transform the dipole
unsigned
int
offset
=
3
*
particleIndex
;
real
molDipole
[
3
];
molDipole
[
0
]
=
molecularDipoles
[
offset
];
molDipole
[
1
]
=
molecularDipoles
[
offset
+
1
];
molDipole
[
2
]
=
molecularDipoles
[
offset
+
2
];
if
(
reverse
)
molDipole
[
1
]
*=
-
1
;
labFrameDipoles
[
offset
]
=
molDipole
[
0
]
*
vectorX
.
x
+
molDipole
[
1
]
*
vectorY
.
x
+
molDipole
[
2
]
*
vectorZ
.
x
;
labFrameDipoles
[
offset
+
1
]
=
molDipole
[
0
]
*
vectorX
.
y
+
molDipole
[
1
]
*
vectorY
.
y
+
molDipole
[
2
]
*
vectorZ
.
y
;
labFrameDipoles
[
offset
+
2
]
=
molDipole
[
0
]
*
vectorX
.
z
+
molDipole
[
1
]
*
vectorY
.
z
+
molDipole
[
2
]
*
vectorZ
.
z
;
// ---------------------------------------------------------------------------------------
// Transform the quadrupole
real
mPole
[
3
][
3
];
offset
=
9
*
particleIndex
;
mPole
[
0
][
0
]
=
molecularQuadrupoles
[
offset
];
mPole
[
0
][
1
]
=
molecularQuadrupoles
[
offset
+
1
];
mPole
[
0
][
2
]
=
molecularQuadrupoles
[
offset
+
2
];
mPole
[
1
][
0
]
=
molecularQuadrupoles
[
offset
+
3
];
mPole
[
1
][
1
]
=
molecularQuadrupoles
[
offset
+
4
];
mPole
[
1
][
2
]
=
molecularQuadrupoles
[
offset
+
5
];
mPole
[
2
][
0
]
=
molecularQuadrupoles
[
offset
+
6
];
mPole
[
2
][
1
]
=
molecularQuadrupoles
[
offset
+
7
];
mPole
[
2
][
2
]
=
molecularQuadrupoles
[
offset
+
8
];
if
(
reverse
)
{
mPole
[
0
][
1
]
*=
-
1
;
mPole
[
1
][
0
]
*=
-
1
;
mPole
[
1
][
2
]
*=
-
1
;
mPole
[
2
][
1
]
*=
-
1
;
}
labFrameQuadrupoles
[
offset
+
8
]
=
vectorX
.
z
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
8
]
+=
vectorY
.
z
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
8
]
+=
vectorZ
.
z
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
y
*
mPole
[
0
][
0
]
+
vectorY
.
y
*
mPole
[
0
][
1
]
+
vectorZ
.
y
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
+=
vectorY
.
y
*
(
vectorX
.
y
*
mPole
[
1
][
0
]
+
vectorY
.
y
*
mPole
[
1
][
1
]
+
vectorZ
.
y
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
+=
vectorZ
.
y
*
(
vectorX
.
y
*
mPole
[
2
][
0
]
+
vectorY
.
y
*
mPole
[
2
][
1
]
+
vectorZ
.
y
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
+=
vectorY
.
y
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
+=
vectorZ
.
y
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
]
=
vectorX
.
x
*
(
vectorX
.
x
*
mPole
[
0
][
0
]
+
vectorY
.
x
*
mPole
[
0
][
1
]
+
vectorZ
.
x
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
]
+=
vectorY
.
x
*
(
vectorX
.
x
*
mPole
[
1
][
0
]
+
vectorY
.
x
*
mPole
[
1
][
1
]
+
vectorZ
.
x
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
]
+=
vectorZ
.
x
*
(
vectorX
.
x
*
mPole
[
2
][
0
]
+
vectorY
.
x
*
mPole
[
2
][
1
]
+
vectorZ
.
x
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
=
vectorX
.
x
*
(
vectorX
.
y
*
mPole
[
0
][
0
]
+
vectorY
.
y
*
mPole
[
0
][
1
]
+
vectorZ
.
y
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
+=
vectorY
.
x
*
(
vectorX
.
y
*
mPole
[
1
][
0
]
+
vectorY
.
y
*
mPole
[
1
][
1
]
+
vectorZ
.
y
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
+=
vectorZ
.
x
*
(
vectorX
.
y
*
mPole
[
2
][
0
]
+
vectorY
.
y
*
mPole
[
2
][
1
]
+
vectorZ
.
y
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
=
vectorX
.
x
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
+=
vectorY
.
x
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
+=
vectorZ
.
x
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
3
]
=
labFrameQuadrupoles
[
offset
+
1
];
labFrameQuadrupoles
[
offset
+
6
]
=
labFrameQuadrupoles
[
offset
+
2
];
labFrameQuadrupoles
[
offset
+
7
]
=
labFrameQuadrupoles
[
offset
+
5
];
}
}
}
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaPME.cpp
0 → 100644
View file @
d0426ba9
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2010 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of AmoebaMultipoleForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "AmoebaTinkerParameterFile.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/AmoebaMultipoleForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testPMEWater
()
{
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
System
system
;
system
.
addParticle
(
16
);
system
.
addParticle
(
1
);
system
.
addParticle
(
1
);
VerletIntegrator
integrator
(
0.01
);
AmoebaMultipoleForce
*
mp
=
new
AmoebaMultipoleForce
();
mp
->
setNonbondedMethod
(
AmoebaMultipoleForce
::
PME
);
vector
<
double
>
dipole
(
3
,
0.0
);
dipole
[
2
]
=
7.556121361e-2
;
vector
<
double
>
quadrupole
(
9
,
0.0
);
quadrupole
[
0
]
=
3.540307211e-2
;
quadrupole
[
4
]
=
-
3.902570771e-2
;
quadrupole
[
8
]
=
3.622635596e-3
;
double
damp
=
9.707801995e-01
*
sqrt
(
0.1
);
double
polarity
=
0.837
*
0.001
;
mp
->
addParticle
(
-
0.51966
,
dipole
,
quadrupole
,
1
,
1
,
2
,
-
1
,
0.39
,
damp
,
polarity
);
dipole
[
0
]
=
-
2.042094848e-2
;
dipole
[
2
]
=
-
3.078753000e-2
;
quadrupole
[
0
]
=
-
3.428482490e-3
;
quadrupole
[
2
]
=
-
1.894859639e-4
;
quadrupole
[
4
]
=
-
1.002408752e-2
;
quadrupole
[
6
]
=
-
1.894859639e-4
;
quadrupole
[
8
]
=
1.345257001e-2
;
damp
=
8.897068742e-01
*
sqrt
(
0.1
);
polarity
=
0.496
*
0.001
;
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
2
,
-
1
,
0.39
,
damp
,
polarity
);
mp
->
addParticle
(
0.25983
,
dipole
,
quadrupole
,
0
,
0
,
1
,
-
1
,
0.39
,
damp
,
polarity
);
mp
->
setCutoffDistance
(
1.0
);
std
::
vector
<
int
>
intVector
;
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
PolarizationCovalent11
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
1
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
0
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
2
);
mp
->
setCovalentMap
(
1
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
intVector
.
resize
(
0
);
intVector
.
push_back
(
0
);
intVector
.
push_back
(
1
);
mp
->
setCovalentMap
(
2
,
AmoebaMultipoleForce
::
Covalent12
,
intVector
);
mp
->
setEwaldErrorTolerance
(
TOL
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2
,
0
,
0
),
Vec3
(
0
,
2
,
0
),
Vec3
(
0
,
0
,
2
));
system
.
addForce
(
mp
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
const
double
angle
=
109.47
*
M_PI
/
180
;
const
double
dOH
=
0.1
;
positions
[
0
]
=
Vec3
();
positions
[
1
]
=
Vec3
(
dOH
,
0
,
0
);
positions
[
2
]
=
Vec3
(
dOH
*
std
::
cos
(
angle
),
dOH
*
std
::
sin
(
angle
),
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
stderr
,
"PME forces
\n
"
);
for
(
unsigned
int
ii
=
0
;
ii
<
forces
.
size
();
ii
++
){
(
void
)
fprintf
(
stderr
,
"%6u [%14.7e %14.7e %14.7e]
\n
"
,
ii
,
forces
[
ii
][
0
],
forces
[
ii
][
1
],
forces
[
ii
][
2
]
);
}
(
void
)
fflush
(
stderr
);
#endif
}
int
main
()
{
try
{
registerAmoebaCudaKernelFactories
();
testPMEWater
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
std
::
cout
<<
"Done"
<<
std
::
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment