Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e5b7f0b3
Commit
e5b7f0b3
authored
Aug 02, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce
parent
155fe172
Changes
7
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1673 additions
and
29 deletions
+1673
-29
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+10
-2
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+2
-1
plugins/amoeba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
...eba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
+2
-2
plugins/amoeba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
...ba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
+11
-11
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
...amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
+1
-1
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
+229
-12
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
...ba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
+1418
-0
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
e5b7f0b3
...
...
@@ -866,7 +866,7 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
dampingAndThole
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
currentEpsilon
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
)
{
}
...
...
@@ -887,6 +887,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
field
;
if
(
fieldPolar
!=
NULL
)
delete
fieldPolar
;
if
(
torque
!=
NULL
)
delete
torque
;
if
(
dampingAndThole
!=
NULL
)
delete
dampingAndThole
;
if
(
inducedDipole
!=
NULL
)
...
...
@@ -966,10 +968,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
field
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"field"
);
fieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"fieldPolar"
);
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
// Record which atoms should be flagged as exclusions based on covalent groups, and determine
// the values for the covalent group flags.
...
...
@@ -1025,6 +1029,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoles
,
defines
);
computeMomentsKernel
=
cu
.
getKernel
(
module
,
"computeLabFrameMoments"
);
recordInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipoles"
);
mapTorqueKernel
=
cu
.
getKernel
(
module
,
"mapTorqueToForce"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
stringstream
electrostaticsSource
;
...
...
@@ -1437,12 +1442,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Compute electrostatic force.
void
*
electrostaticsArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
void
*
electrostaticsArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
void
*
mapTorqueArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
()};
cu
.
executeKernel
(
mapTorqueKernel
,
mapTorqueArgs
,
cu
.
getNumAtoms
());
}
return
0.0
;
}
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
e5b7f0b3
...
...
@@ -388,6 +388,7 @@ private:
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
field
;
CudaArray
*
fieldPolar
;
CudaArray
*
torque
;
CudaArray
*
dampingAndThole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
...
...
@@ -412,7 +413,7 @@ private:
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
electrostaticsKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
};
/**
...
...
plugins/amoeba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
View file @
e5b7f0b3
...
...
@@ -92,12 +92,12 @@ __device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom
real
dsc7
=
rr7
*
scale7
*
dScale
;
real
psc7
=
rr7
*
scale7
*
pScale
;
real
atom2quadrupoleZZ
=
1
-
atom2
.
quadrupoleXX
-
atom2
.
quadrupoleYY
;
real
atom2quadrupoleZZ
=
-
(
atom2
.
quadrupoleXX
+
atom2
.
quadrupoleYY
)
;
real
qJr_0
=
atom2
.
quadrupoleXX
*
xr
+
atom2
.
quadrupoleXY
*
yr
+
atom2
.
quadrupoleXZ
*
zr
;
real
qJr_1
=
atom2
.
quadrupoleXY
*
xr
+
atom2
.
quadrupoleYY
*
yr
+
atom2
.
quadrupoleYZ
*
zr
;
real
qJr_2
=
atom2
.
quadrupoleXZ
*
xr
+
atom2
.
quadrupoleYZ
*
yr
+
atom2quadrupoleZZ
*
zr
;
real
atom1quadrupoleZZ
=
1
-
atom1
.
quadrupoleXX
-
atom1
.
quadrupoleYY
;
real
atom1quadrupoleZZ
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
)
;
real
qIr_0
=
atom1
.
quadrupoleXX
*
xr
+
atom1
.
quadrupoleXY
*
yr
+
atom1
.
quadrupoleXZ
*
zr
;
real
qIr_1
=
atom1
.
quadrupoleXY
*
xr
+
atom1
.
quadrupoleYY
*
yr
+
atom1
.
quadrupoleYZ
*
zr
;
real
qIr_2
=
atom1
.
quadrupoleXZ
*
xr
+
atom1
.
quadrupoleYZ
*
yr
+
atom1quadrupoleZZ
*
zr
;
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
View file @
e5b7f0b3
...
...
@@ -56,7 +56,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
* Compute electrostatic interactions.
*/
extern
"C"
__global__
void
computeElectrostatics
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
...
...
@@ -183,9 +183,9 @@ extern "C" __global__ void computeElectrostatics(
polarizationGroup
>>=
1
;
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
f
or
c
eBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
}
else
{
// This is an off-diagonal tile.
...
...
@@ -338,18 +338,18 @@ extern "C" __global__ void computeElectrostatics(
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
f
or
c
eBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
f
or
c
eBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
f
or
c
eBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
t
or
qu
eBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0xFFFFFFFF
)));
}
}
}
}
pos
++
;
}
while
(
pos
<
end
);
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
*
ENERGY_SCALE_FACTOR
;
}
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
View file @
e5b7f0b3
...
...
@@ -19,7 +19,7 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data
.
quadrupoleXZ
=
labFrameQuadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrameQuadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrameQuadrupole
[
atom
*
5
+
4
];
data
.
quadrupoleZZ
=
1
-
data
.
quadrupoleXX
-
data
.
quadrupoleYY
;
data
.
quadrupoleZZ
=
-
(
data
.
quadrupoleXX
+
data
.
quadrupoleYY
)
;
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
View file @
e5b7f0b3
...
...
@@ -170,27 +170,27 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
real
mPoleXZ
=
molecularQuadrupoles
[
offset
+
2
];
real
mPoleYY
=
molecularQuadrupoles
[
offset
+
3
];
real
mPoleYZ
=
molecularQuadrupoles
[
offset
+
4
];
real
mPoleZZ
=
1
-
mPoleXX
-
mPoleYY
;
real
mPoleZZ
=
-
(
mPoleXX
+
mPoleYY
)
;
if
(
reverse
)
{
mPoleXY
*=
-
1
;
mPoleYZ
*=
-
1
;
}
labFrameQuadrupoles
[
offset
]
=
vectorX
.
x
*
(
vectorX
.
x
*
mPoleXX
+
vectorY
.
x
*
mPoleXY
+
vectorZ
.
x
*
mPoleXZ
)
;
+
vectorY
.
x
*
(
vectorX
.
x
*
mPoleXY
+
vectorY
.
x
*
mPoleYY
+
vectorZ
.
x
*
mPoleYZ
)
;
labFrameQuadrupoles
[
offset
]
=
vectorX
.
x
*
(
vectorX
.
x
*
mPoleXX
+
vectorY
.
x
*
mPoleXY
+
vectorZ
.
x
*
mPoleXZ
)
+
vectorY
.
x
*
(
vectorX
.
x
*
mPoleXY
+
vectorY
.
x
*
mPoleYY
+
vectorZ
.
x
*
mPoleYZ
)
+
vectorZ
.
x
*
(
vectorX
.
x
*
mPoleXZ
+
vectorY
.
x
*
mPoleYZ
+
vectorZ
.
x
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
1
]
=
vectorX
.
x
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
)
;
+
vectorY
.
x
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
)
;
labFrameQuadrupoles
[
offset
+
1
]
=
vectorX
.
x
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
)
+
vectorY
.
x
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
)
+
vectorZ
.
x
*
(
vectorX
.
y
*
mPoleXZ
+
vectorY
.
y
*
mPoleYZ
+
vectorZ
.
y
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
2
]
=
vectorX
.
x
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
)
;
+
vectorY
.
x
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
)
;
labFrameQuadrupoles
[
offset
+
2
]
=
vectorX
.
x
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
)
+
vectorY
.
x
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
)
+
vectorZ
.
x
*
(
vectorX
.
z
*
mPoleXZ
+
vectorY
.
z
*
mPoleYZ
+
vectorZ
.
z
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
3
]
=
vectorX
.
y
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
)
;
+
vectorY
.
y
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
)
;
labFrameQuadrupoles
[
offset
+
3
]
=
vectorX
.
y
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
)
+
vectorY
.
y
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
)
+
vectorZ
.
y
*
(
vectorX
.
y
*
mPoleXZ
+
vectorY
.
y
*
mPoleYZ
+
vectorZ
.
y
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
)
;
+
vectorY
.
y
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
)
;
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
)
+
vectorY
.
y
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
)
+
vectorZ
.
y
*
(
vectorX
.
z
*
mPoleXZ
+
vectorY
.
z
*
mPoleYZ
+
vectorZ
.
z
*
mPoleZZ
);
}
}
...
...
@@ -208,3 +208,220 @@ extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fi
inducedDipolePolar
[
3
*
atom
+
2
]
=
scale
*
fieldPolarBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
];
}
}
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline
__device__
real3
trim
(
real4
v
)
{
return
make_real3
(
v
.
x
,
v
.
y
,
v
.
z
);
}
/**
* Normalize a vector and return what its magnitude was.
*/
inline
__device__
real
normVector
(
real3
&
v
)
{
real
n
=
SQRT
(
dot
(
v
,
v
));
v
*=
(
n
>
0
?
RECIP
(
n
)
:
0
);
return
n
;
}
extern
"C"
__global__
void
mapTorqueToForce
(
unsigned
long
long
*
__restrict__
forceBuffers
,
const
long
long
*
__restrict__
torqueBuffers
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
multipoleParticles
)
{
const
int
U
=
0
;
const
int
V
=
1
;
const
int
W
=
2
;
const
int
R
=
3
;
const
int
S
=
4
;
const
int
UV
=
5
;
const
int
UW
=
6
;
const
int
VW
=
7
;
const
int
UR
=
8
;
const
int
US
=
9
;
const
int
VS
=
10
;
const
int
WS
=
11
;
const
int
LastVectorIndex
=
12
;
const
int
X
=
0
;
const
int
Y
=
1
;
const
int
Z
=
2
;
const
int
I
=
3
;
const
real
torqueScale
=
RECIP
((
double
)
0xFFFFFFFF
);
real3
forces
[
4
];
real
norms
[
LastVectorIndex
];
real3
vector
[
LastVectorIndex
];
real
angles
[
LastVectorIndex
][
2
];
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
gridDim
.
x
*
blockDim
.
x
)
{
int4
particles
=
multipoleParticles
[
atom
];
int
axisAtom
=
particles
.
z
;
int
axisType
=
particles
.
w
;
// NoAxisType
if
(
axisType
<
5
&&
particles
.
z
>=
0
)
{
real3
atomPos
=
trim
(
posq
[
atom
]);
vector
[
U
]
=
atomPos
-
trim
(
posq
[
axisAtom
]);
norms
[
U
]
=
normVector
(
vector
[
U
]);
if
(
axisType
!=
4
&&
particles
.
x
>=
0
)
vector
[
V
]
=
atomPos
-
trim
(
posq
[
particles
.
x
]);
else
vector
[
V
]
=
make_real3
(
0.1
f
);
norms
[
V
]
=
normVector
(
vector
[
V
]);
// W = UxV
if
(
axisType
<
2
||
axisType
>
3
)
vector
[
W
]
=
cross
(
vector
[
U
],
vector
[
V
]);
else
vector
[
W
]
=
atomPos
-
trim
(
posq
[
particles
.
y
]);
norms
[
W
]
=
normVector
(
vector
[
W
]);
vector
[
UV
]
=
cross
(
vector
[
V
],
vector
[
U
]);
vector
[
UW
]
=
cross
(
vector
[
W
],
vector
[
U
]);
vector
[
VW
]
=
cross
(
vector
[
W
],
vector
[
V
]);
norms
[
UV
]
=
normVector
(
vector
[
UV
]);
norms
[
UW
]
=
normVector
(
vector
[
UW
]);
norms
[
VW
]
=
normVector
(
vector
[
VW
]);
angles
[
UV
][
0
]
=
dot
(
vector
[
U
],
vector
[
V
]);
angles
[
UV
][
1
]
=
SQRT
(
1
-
angles
[
UV
][
0
]
*
angles
[
UV
][
0
]);
angles
[
UW
][
0
]
=
dot
(
vector
[
U
],
vector
[
W
]);
angles
[
UW
][
1
]
=
SQRT
(
1
-
angles
[
UW
][
0
]
*
angles
[
UW
][
0
]);
angles
[
VW
][
0
]
=
dot
(
vector
[
V
],
vector
[
W
]);
angles
[
VW
][
1
]
=
SQRT
(
1
-
angles
[
VW
][
0
]
*
angles
[
VW
][
0
]);
real
dphi
[
3
];
real3
torque
=
make_real3
(
torqueScale
*
torqueBuffers
[
atom
],
torqueScale
*
torqueBuffers
[
atom
+
PADDED_NUM_ATOMS
],
torqueScale
*
torqueBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
]);
dphi
[
U
]
=
-
dot
(
vector
[
U
],
torque
);
dphi
[
V
]
=
-
dot
(
vector
[
V
],
torque
);
dphi
[
W
]
=
-
dot
(
vector
[
W
],
torque
);
// z-then-x and bisector
if
(
axisType
==
0
||
axisType
==
1
)
{
real
factor1
=
dphi
[
V
]
/
(
norms
[
U
]
*
angles
[
UV
][
1
]);
real
factor2
=
dphi
[
W
]
/
(
norms
[
U
]);
real
factor3
=
-
dphi
[
U
]
/
(
norms
[
V
]
*
angles
[
UV
][
1
]);
real
factor4
=
0
;
if
(
axisType
==
1
)
{
factor2
*=
0.5
f
;
factor4
=
0.5
f
*
dphi
[
W
]
/
(
norms
[
V
]);
}
forces
[
Z
]
=
vector
[
UV
]
*
factor1
+
factor2
*
vector
[
UW
];
forces
[
X
]
=
vector
[
UV
]
*
factor3
+
factor4
*
vector
[
VW
];
forces
[
I
]
=
-
(
forces
[
X
]
+
forces
[
Z
]);
forces
[
Y
]
=
make_real3
(
0
);
}
else
if
(
axisType
==
2
)
{
// z-bisect
vector
[
R
]
=
vector
[
V
]
+
vector
[
W
];
vector
[
S
]
=
cross
(
vector
[
U
],
vector
[
R
]);
norms
[
R
]
=
normVector
(
vector
[
R
]);
norms
[
S
]
=
normVector
(
vector
[
S
]);
vector
[
UR
]
=
cross
(
vector
[
R
],
vector
[
U
]);
vector
[
US
]
=
cross
(
vector
[
S
],
vector
[
U
]);
vector
[
VS
]
=
cross
(
vector
[
S
],
vector
[
V
]);
vector
[
WS
]
=
cross
(
vector
[
S
],
vector
[
W
]);
norms
[
UR
]
=
normVector
(
vector
[
UR
]);
norms
[
US
]
=
normVector
(
vector
[
US
]);
norms
[
VS
]
=
normVector
(
vector
[
VS
]);
norms
[
WS
]
=
normVector
(
vector
[
WS
]);
angles
[
UR
][
0
]
=
dot
(
vector
[
U
],
vector
[
R
]);
angles
[
UR
][
1
]
=
SQRT
(
1
-
angles
[
UR
][
0
]
*
angles
[
UR
][
0
]);
angles
[
US
][
0
]
=
dot
(
vector
[
U
],
vector
[
S
]);
angles
[
US
][
1
]
=
SQRT
(
1
-
angles
[
US
][
0
]
*
angles
[
US
][
0
]);
angles
[
VS
][
0
]
=
dot
(
vector
[
V
],
vector
[
S
]);
angles
[
VS
][
1
]
=
SQRT
(
1
-
angles
[
VS
][
0
]
*
angles
[
VS
][
0
]);
angles
[
WS
][
0
]
=
dot
(
vector
[
W
],
vector
[
S
]);
angles
[
WS
][
1
]
=
SQRT
(
1
-
angles
[
WS
][
0
]
*
angles
[
WS
][
0
]);
real3
t1
=
vector
[
V
]
-
vector
[
S
]
*
angles
[
VS
][
0
];
real3
t2
=
vector
[
W
]
-
vector
[
S
]
*
angles
[
WS
][
0
];
normVector
(
t1
);
normVector
(
t2
);
real
ut1cos
=
dot
(
vector
[
U
],
t1
);
real
ut1sin
=
SQRT
(
1
-
ut1cos
*
ut1cos
);
real
ut2cos
=
dot
(
vector
[
U
],
t2
);
real
ut2sin
=
SQRT
(
1
-
ut2cos
*
ut2cos
);
real
dphiR
=
-
dot
(
vector
[
R
],
torque
);
real
dphiS
=
-
dot
(
vector
[
S
],
torque
);
real
factor1
=
dphiR
/
(
norms
[
U
]
*
angles
[
UR
][
1
]);
real
factor2
=
dphiS
/
(
norms
[
U
]);
real
factor3
=
dphi
[
U
]
/
(
norms
[
V
]
*
(
ut1sin
+
ut2sin
));
real
factor4
=
dphi
[
U
]
/
(
norms
[
W
]
*
(
ut1sin
+
ut2sin
));
forces
[
Z
]
=
vector
[
UR
]
*
factor1
+
factor2
*
vector
[
US
];
forces
[
X
]
=
(
angles
[
VS
][
1
]
*
vector
[
S
]
-
angles
[
VS
][
0
]
*
t1
)
*
factor3
;
forces
[
Y
]
=
(
angles
[
WS
][
1
]
*
vector
[
S
]
-
angles
[
WS
][
0
]
*
t2
)
*
factor4
;
forces
[
I
]
=
-
(
forces
[
X
]
+
forces
[
Y
]
+
forces
[
Z
]);
}
else
if
(
axisType
==
3
)
{
// 3-fold
forces
[
Z
]
=
(
vector
[
UW
]
*
dphi
[
W
]
/
(
norms
[
U
]
*
angles
[
UW
][
1
])
+
vector
[
UV
]
*
dphi
[
V
]
/
(
norms
[
U
]
*
angles
[
UV
][
1
])
-
vector
[
UW
]
*
dphi
[
U
]
/
(
norms
[
U
]
*
angles
[
UW
][
1
])
-
vector
[
UV
]
*
dphi
[
U
]
/
(
norms
[
U
]
*
angles
[
UV
][
1
]))
/
3
;
forces
[
X
]
=
(
vector
[
VW
]
*
dphi
[
W
]
/
(
norms
[
V
]
*
angles
[
VW
][
1
])
-
vector
[
UV
]
*
dphi
[
U
]
/
(
norms
[
V
]
*
angles
[
UV
][
1
])
-
vector
[
VW
]
*
dphi
[
V
]
/
(
norms
[
V
]
*
angles
[
VW
][
1
])
+
vector
[
UV
]
*
dphi
[
V
]
/
(
norms
[
V
]
*
angles
[
UV
][
1
]))
/
3
;
forces
[
Y
]
=
(
-
vector
[
UW
]
*
dphi
[
U
]
/
(
norms
[
W
]
*
angles
[
UW
][
1
])
-
vector
[
VW
]
*
dphi
[
V
]
/
(
norms
[
W
]
*
angles
[
VW
][
1
])
+
vector
[
UW
]
*
dphi
[
W
]
/
(
norms
[
W
]
*
angles
[
UW
][
1
])
+
vector
[
VW
]
*
dphi
[
W
]
/
(
norms
[
W
]
*
angles
[
VW
][
1
]))
/
3
;
forces
[
I
]
=
-
(
forces
[
X
]
+
forces
[
Y
]
+
forces
[
Z
]);
}
else
if
(
axisType
==
4
)
{
// z-only
forces
[
Z
]
=
vector
[
UV
]
*
dphi
[
V
]
/
(
norms
[
U
]
*
angles
[
UV
][
1
]);
forces
[
X
]
=
make_float3
(
0
);
forces
[
Y
]
=
make_float3
(
0
);
forces
[
I
]
=
-
forces
[
Z
];
}
else
{
forces
[
Z
]
=
make_float3
(
0
);
forces
[
X
]
=
make_float3
(
0
);
forces
[
Y
]
=
make_float3
(
0
);
forces
[
I
]
=
make_float3
(
0
);
}
// Store results
atomicAdd
(
&
forceBuffers
[
particles
.
z
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Z
].
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
z
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Z
].
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
z
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Z
].
z
*
0xFFFFFFFF
)));
if
(
axisType
!=
4
)
{
atomicAdd
(
&
forceBuffers
[
particles
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
X
].
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
X
].
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
X
].
z
*
0xFFFFFFFF
)));
}
if
((
axisType
==
2
||
axisType
==
3
)
&&
particles
.
y
>
-
1
)
{
atomicAdd
(
&
forceBuffers
[
particles
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Y
].
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Y
].
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
particles
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
Y
].
z
*
0xFFFFFFFF
)));
}
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
I
].
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
I
].
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forces
[
I
].
z
*
0xFFFFFFFF
)));
}
}
}
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
0 → 100644
View file @
e5b7f0b3
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment