Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
222b3bb4
Commit
222b3bb4
authored
Aug 01, 2016
by
peastman
Browse files
Began CUDA version of GayBerneForce
parent
a381a3ab
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1073 additions
and
1 deletion
+1073
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+68
-0
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+425
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+1
-0
platforms/cuda/src/kernels/gayBerne.cu
platforms/cuda/src/kernels/gayBerne.cu
+540
-0
platforms/cuda/tests/TestCudaGayBerneForce.cpp
platforms/cuda/tests/TestCudaGayBerneForce.cpp
+36
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
222b3bb4
...
...
@@ -1104,6 +1104,74 @@ private:
CUevent
event
;
};
/**
* This kernel is invoked by GayBerneForce to calculate the forces acting on the system.
*/
class
CudaCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
public:
CudaCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
exceptionParticles
(
NULL
),
exceptionParams
(
NULL
),
aMatrix
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighbors
(
NULL
),
neighborIndex
(
NULL
),
neighborBlockCount
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
}
~
CudaCalcGayBerneForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GayBerneForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
GayBerneForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GayBerneForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
private:
class
ReorderListener
;
void
sortAtoms
();
CudaContext
&
cu
;
bool
hasInitializedKernels
;
int
numRealParticles
,
numExceptions
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
CudaArray
*
sortedParticles
;
CudaArray
*
axisParticleIndices
;
CudaArray
*
sigParams
;
CudaArray
*
epsParams
;
CudaArray
*
scale
;
CudaArray
*
exceptionParticles
;
CudaArray
*
exceptionParams
;
CudaArray
*
aMatrix
;
CudaArray
*
bMatrix
;
CudaArray
*
gMatrix
;
CudaArray
*
exclusions
;
CudaArray
*
exclusionStartIndex
;
CudaArray
*
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
*
neighbors
;
CudaArray
*
neighborIndex
;
CudaArray
*
neighborBlockCount
;
CudaArray
*
sortedPos
;
CudaArray
*
torque
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
std
::
vector
<
void
*>
framesArgs
,
blockBoundsArgs
,
neighborsArgs
,
forceArgs
,
torqueArgs
;
CUfunction
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
,
torqueKernel
;
CUevent
event
;
};
/**
* This kernel is invoked by VerletIntegrator to take one time step.
*/
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
222b3bb4
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -110,6 +110,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
CudaCalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
return
new
CudaCalcGayBerneForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
return
new
CudaIntegrateVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
222b3bb4
...
...
@@ -5988,6 +5988,431 @@ void CudaCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImpl&
cu
.
invalidateMolecules
();
}
class
CudaGayBerneForceInfo
:
public
CudaForceInfo
{
public:
CudaGayBerneForceInfo
(
const
GayBerneForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
int
xparticle1
,
yparticle1
;
double
sigma1
,
epsilon1
,
sx1
,
sy1
,
sz1
,
ex1
,
ey1
,
ez1
;
int
xparticle2
,
yparticle2
;
double
sigma2
,
epsilon2
,
sx2
,
sy2
,
sz2
,
ex2
,
ey2
,
ez2
;
force
.
getParticleParameters
(
particle1
,
sigma1
,
epsilon1
,
xparticle1
,
yparticle1
,
sx1
,
sy1
,
sz1
,
ex1
,
ey1
,
ez1
);
force
.
getParticleParameters
(
particle2
,
sigma2
,
epsilon2
,
xparticle2
,
yparticle2
,
sx2
,
sy2
,
sz2
,
ex2
,
ey2
,
ez2
);
return
(
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
&&
sx1
==
sx2
&&
sy1
==
sy2
&&
sz1
==
sz2
&&
ex1
==
ex2
&&
ey1
==
ey2
&&
ez1
==
ez2
);
}
int
getNumParticleGroups
()
{
return
force
.
getNumExceptions
()
+
force
.
getNumParticles
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
if
(
index
<
force
.
getNumExceptions
())
{
int
particle1
,
particle2
;
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
index
,
particle1
,
particle2
,
sigma
,
epsilon
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
else
{
int
particle
=
index
-
force
.
getNumExceptions
();
int
xparticle
,
yparticle
;
double
sigma
,
epsilon
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
;
force
.
getParticleParameters
(
particle
,
sigma
,
epsilon
,
xparticle
,
yparticle
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
);
particles
.
clear
();
particles
.
push_back
(
particle
);
if
(
xparticle
>
-
1
)
particles
.
push_back
(
xparticle
);
if
(
yparticle
>
-
1
)
particles
.
push_back
(
yparticle
);
}
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
if
(
group1
<
force
.
getNumExceptions
()
&&
group2
<
force
.
getNumExceptions
())
{
int
particle1
,
particle2
;
double
sigma1
,
sigma2
,
epsilon1
,
epsilon2
;
force
.
getExceptionParameters
(
group1
,
particle1
,
particle2
,
sigma1
,
epsilon1
);
force
.
getExceptionParameters
(
group2
,
particle1
,
particle2
,
sigma2
,
epsilon2
);
return
(
sigma1
==
sigma2
&&
epsilon1
==
epsilon2
);
}
return
true
;
}
private:
const
GayBerneForce
&
force
;
};
class
CudaCalcGayBerneForceKernel
::
ReorderListener
:
public
CudaContext
::
ReorderListener
{
public:
ReorderListener
(
CudaCalcGayBerneForceKernel
&
owner
)
:
owner
(
owner
)
{
}
void
execute
()
{
owner
.
sortAtoms
();
}
private:
CudaCalcGayBerneForceKernel
&
owner
;
};
CudaCalcGayBerneForceKernel
::~
CudaCalcGayBerneForceKernel
()
{
if
(
sortedParticles
!=
NULL
)
delete
sortedParticles
;
if
(
axisParticleIndices
!=
NULL
)
delete
axisParticleIndices
;
if
(
sigParams
!=
NULL
)
delete
sigParams
;
if
(
epsParams
!=
NULL
)
delete
epsParams
;
if
(
scale
!=
NULL
)
delete
scale
;
if
(
exceptionParticles
!=
NULL
)
delete
exceptionParticles
;
if
(
exceptionParams
!=
NULL
)
delete
exceptionParams
;
if
(
aMatrix
!=
NULL
)
delete
aMatrix
;
if
(
bMatrix
!=
NULL
)
delete
bMatrix
;
if
(
gMatrix
!=
NULL
)
delete
gMatrix
;
if
(
exclusions
!=
NULL
)
delete
exclusions
;
if
(
exclusionStartIndex
!=
NULL
)
delete
exclusionStartIndex
;
if
(
blockCenter
!=
NULL
)
delete
blockCenter
;
if
(
blockBoundingBox
!=
NULL
)
delete
blockBoundingBox
;
if
(
neighbors
!=
NULL
)
delete
neighbors
;
if
(
neighborIndex
!=
NULL
)
delete
neighborIndex
;
if
(
neighborBlockCount
!=
NULL
)
delete
neighborBlockCount
;
if
(
sortedPos
!=
NULL
)
delete
sortedPos
;
if
(
torque
!=
NULL
)
delete
torque
;
}
void
CudaCalcGayBerneForceKernel
::
initialize
(
const
System
&
system
,
const
GayBerneForce
&
force
)
{
// Initialize interactions.
int
numParticles
=
force
.
getNumParticles
();
sigParams
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"sigParams"
);
epsParams
=
CudaArray
::
create
<
float2
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"epsParams"
);
scale
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"scale"
);
axisParticleIndices
=
CudaArray
::
create
<
int2
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"axisParticleIndices"
);
sortedParticles
=
CudaArray
::
create
<
int
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"sortedParticles"
);
aMatrix
=
CudaArray
::
create
<
float
>
(
cu
,
9
*
cu
.
getPaddedNumAtoms
(),
"aMatrix"
);
bMatrix
=
CudaArray
::
create
<
float
>
(
cu
,
9
*
cu
.
getPaddedNumAtoms
(),
"bMatrix"
);
gMatrix
=
CudaArray
::
create
<
float
>
(
cu
,
9
*
cu
.
getPaddedNumAtoms
(),
"gMatrix"
);
vector
<
float4
>
sigParamsVector
(
cu
.
getPaddedNumAtoms
(),
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float2
>
epsParamsVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
float4
>
scaleVector
(
cu
.
getPaddedNumAtoms
(),
make_float4
(
0
,
0
,
0
,
0
));
vector
<
int2
>
axisParticleVector
(
cu
.
getPaddedNumAtoms
(),
make_int2
(
0
,
0
));
isRealParticle
.
resize
(
cu
.
getPaddedNumAtoms
());
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
int
xparticle
,
yparticle
;
double
sigma
,
epsilon
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
;
force
.
getParticleParameters
(
i
,
sigma
,
epsilon
,
xparticle
,
yparticle
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
);
axisParticleVector
[
i
]
=
make_int2
(
xparticle
,
yparticle
);
sigParamsVector
[
i
]
=
make_float4
((
float
)
(
0.5
*
sigma
),
(
float
)
(
0.25
*
sx
*
sx
),
(
float
)
(
0.25
*
sy
*
sy
),
(
float
)
(
0.25
*
sz
*
sz
));
epsParamsVector
[
i
]
=
make_float2
((
float
)
sqrt
(
epsilon
),
(
float
)
(
0.125
*
(
sx
*
sy
+
sz
*
sz
)
*
sqrt
(
sx
*
sy
)));
scaleVector
[
i
]
=
make_float4
((
float
)
(
1
/
sqrt
(
ex
)),
(
float
)
(
1
/
sqrt
(
ey
)),
(
float
)
(
1
/
sqrt
(
ez
)),
0
);
isRealParticle
[
i
]
=
(
epsilon
!=
0.0
);
}
sigParams
->
upload
(
sigParamsVector
);
epsParams
->
upload
(
epsParamsVector
);
scale
->
upload
(
scaleVector
);
axisParticleIndices
->
upload
(
axisParticleVector
);
// Record exceptions and exclusions.
vector
<
float2
>
exceptionParamsVec
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
sigma
,
epsilon
);
if
(
epsilon
!=
0.0
)
{
exceptionParamsVec
.
push_back
(
make_float2
((
float
)
sigma
,
(
float
)
epsilon
));
exceptionAtoms
.
push_back
(
make_pair
(
particle1
,
particle2
));
isRealParticle
[
particle1
]
=
true
;
isRealParticle
[
particle2
]
=
true
;
}
if
(
isRealParticle
[
particle1
]
&&
isRealParticle
[
particle2
])
excludedPairs
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
}
numRealParticles
=
0
;
for
(
int
i
=
0
;
i
<
isRealParticle
.
size
();
i
++
)
if
(
isRealParticle
[
i
])
numRealParticles
++
;
numExceptions
=
exceptionParamsVec
.
size
();
exclusions
=
CudaArray
::
create
<
int
>
(
cu
,
max
(
1
,
(
int
)
excludedPairs
.
size
()),
"exclusions"
);
exclusionStartIndex
=
CudaArray
::
create
<
int
>
(
cu
,
numRealParticles
+
1
,
"exclusionStartIndex"
);
exceptionParticles
=
CudaArray
::
create
<
int4
>
(
cu
,
max
(
1
,
numExceptions
),
"exceptionParticles"
);
exceptionParams
=
CudaArray
::
create
<
float2
>
(
cu
,
max
(
1
,
numExceptions
),
"exceptionParams"
);
if
(
numExceptions
>
0
)
exceptionParams
->
upload
(
exceptionParamsVec
);
// Create data structures used for the neighbor list.
int
numAtomBlocks
=
(
numRealParticles
+
31
)
/
32
;
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
blockCenter
=
new
CudaArray
(
cu
,
numAtomBlocks
,
4
*
elementSize
,
"blockCenter"
);
blockBoundingBox
=
new
CudaArray
(
cu
,
numAtomBlocks
,
4
*
elementSize
,
"blockBoundingBox"
);
sortedPos
=
new
CudaArray
(
cu
,
numRealParticles
,
4
*
elementSize
,
"sortedPos"
);
maxNeighborBlocks
=
numRealParticles
*
2
;
neighbors
=
CudaArray
::
create
<
int
>
(
cu
,
maxNeighborBlocks
*
32
,
"neighbors"
);
neighborIndex
=
CudaArray
::
create
<
int
>
(
cu
,
maxNeighborBlocks
,
"neighbors"
);
neighborBlockCount
=
CudaArray
::
create
<
int
>
(
cu
,
1
,
"neighborBlockCount"
);
if
(
force
.
getNonbondedMethod
()
!=
GayberneForce
::
NoCutoff
)
CHECK_RESULT
(
cuEventCreate
(
&
event
,
CU_EVENT_DISABLE_TIMING
),
"Error creating event for CustomManyParticleForce"
);
// Create array for accumulating torques.
torque
=
CudaArray
::
create
<
long
long
>
(
cu
,
3
*
cu
.
getPaddedNumAtoms
(),
"torque"
);
cu
.
addAutoclearBuffer
(
*
torque
);
// Create the kernels.
nonbondedMethod
=
force
.
getNonbondedMethod
();
bool
useCutoff
=
(
nonbondedMethod
!=
GayBerneForce
::
NoCutoff
);
bool
usePeriodic
=
(
nonbondedMethod
==
GayBerneForce
::
CutoffPeriodic
);
map
<
string
,
string
>
defines
;
defines
[
"USE_SWITCH"
]
=
(
useCutoff
&&
force
.
getUseSwitchingFunction
()
?
"1"
:
"0"
);
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
if
(
useCutoff
)
{
defines
[
"USE_CUTOFF"
]
=
1
;
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
// Compute the switching coefficients.
if
(
force
.
getUseSwitchingFunction
())
{
defines
[
"SWITCH_CUTOFF"
]
=
cu
.
doubleToString
(
force
.
getSwitchingDistance
());
defines
[
"SWITCH_C3"
]
=
cu
.
doubleToString
(
10
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
3.0
));
defines
[
"SWITCH_C4"
]
=
cu
.
doubleToString
(
15
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
4.0
));
defines
[
"SWITCH_C5"
]
=
cu
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
5.0
));
}
}
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudakernelSources
::
vectorOps
+
CudaKernelSources
::
gayBerne
,
defines
);
framesKernel
=
cu
.
getKernel
(
module
,
"computeEllipsoidFrames"
);
blockBoundsKernel
=
cu
.
getKernel
(
module
,
"findBlockBounds"
);
neighborsKernel
=
cu
.
getKernel
(
module
,
"findNeighbors"
);
forceKernel
=
cu
.
getKernel
(
module
,
"computeForce"
);
torqueKernel
=
cu
.
getKernel
(
module
,
"applyTorques"
);
cu
.
addForce
(
new
CudaGayBerneForceInfo
(
force
));
cu
.
addReorderListener
(
new
ReorderListener
(
*
this
));
}
double
CudaCalcGayBerneForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
sortAtoms
();
framesArgs
.
push_back
(
&
numRealParticles
);
framesArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
framesArgs
.
push_back
(
&
axisParticleIndices
->
getDevicePointer
());
framesArgs
.
push_back
(
&
sigParams
->
getDevicePointer
());
framesArgs
.
push_back
(
&
scale
->
getDevicePointer
());
framesArgs
.
push_back
(
&
aMatrix
->
getDevicePointer
());
framesArgs
.
push_back
(
&
bMatrix
->
getDevicePointer
());
framesArgs
.
push_back
(
&
gMatrix
->
getDevicePointer
());
framesArgs
.
push_back
(
&
sortedParticles
->
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
numRealParticles
);
blockBoundsArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
blockBoundsArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
blockBoundsArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
blockBoundsArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
blockBoundsArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
blockBoundsArgs
.
push_back
(
&
sortedParticles
->
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
sortedPos
->
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
blockBoundsArgs
.
push_back
(
&
neighborBlockCount
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
numRealParticles
);
neighborsArgs
.
push_back
(
&
maxNeighborBlocks
);
neighborsArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
neighborsArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
neighborsArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
neighborsArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
neighborsArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
neighborsArgs
.
push_back
(
&
sortedPos
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
neighbors
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
neighborIndex
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
neighborBlockCount
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
neighborsArgs
.
push_back
(
&
exclusionStartIndex
->
getDevicePointer
());
forceArgs
.
push_back
(
&
cu
.
getLongForceBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
torque
->
getDevicePointer
());
forceArgs
.
push_back
(
&
numRealParticles
);
forceArgs
.
push_back
(
&
numExceptions
);
forceArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
sortedPos
->
getDevicePointer
());
forceArgs
.
push_back
(
&
sigParams
->
getDevicePointer
());
forceArgs
.
push_back
(
&
epsParams
->
getDevicePointer
());
forceArgs
.
push_back
(
&
sortedParticles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
aMatrix
->
getDevicePointer
());
forceArgs
.
push_back
(
&
bMatrix
->
getDevicePointer
());
forceArgs
.
push_back
(
&
gMatrix
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionStartIndex
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exceptionParticles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exceptionParams
->
getDevicePointer
());
if
(
nonbondedMethod
!=
GayBerneForce
::
NoCutoff
)
{
forceArgs
.
push_back
(
&
maxNeighborBlocks
);
forceArgs
.
push_back
(
&
neighbors
->
getDevicePointer
());
forceArgs
.
push_back
(
&
neighborIndex
->
getDevicePointer
());
forceArgs
.
push_back
(
&
neighborBlockCount
->
getDevicePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
}
torqueArgs
.
push_back
(
&
cu
.
getLongForceBuffer
().
getDevicePointer
());
torqueArgs
.
push_back
(
&
torque
->
getDevicePointer
());
torqueArgs
.
push_back
(
&
numRealParticles
);
torqueArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
torqueArgs
.
push_back
(
&
axisParticleIndices
->
getDevicePointer
());
torqueArgs
.
push_back
(
&
sortedParticles
->
getDevicePointer
());
}
cu
.
executeKernel
(
framesKernel
,
&
framesArgs
[
0
],
numRealParticles
);
cu
.
executeKernel
(
blockBoundsKernel
,
&
blockBoundsArgs
[
0
],
(
numRealParticles
+
31
)
/
32
);
if
(
nonbondedMethod
==
GayBerneForce
::
NoCutoff
)
{
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
()
*
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
}
else
{
while
(
true
)
{
cu
.
executeKernel
(
neighborsKernel
,
&
neighborsArgs
[
0
],
numRealParticles
);
int
*
count
=
(
int
*
)
cu
.
getPinnedBuffer
();
neighborBlockCount
->
download
(
count
,
false
);
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
()
*
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
CHECK_RESULT
(
cuEventSynchronize
(
event
),
"Error synchronizing on event for GayBerneForce"
);
if
(
*
count
<=
maxNeighborBlocks
)
break
;
// There wasn't enough room for the neighbor list, so we need to recreate it.
delete
neighbors
;
neighbors
=
NULL
;
delete
neighborIndex
;
neighborIndex
=
NULL
;
maxNeighborBlocks
=
(
int
)
ceil
((
*
count
)
*
1.1
);
neighbors
=
CudaArray
::
create
<
int
>
(
cu
,
maxNeighborBlocks
*
32
,
"neighbors"
);
neighborIndex
=
CudaArray
::
create
<
int
>
(
cu
,
maxNeighborBlocks
,
"neighbors"
);
neighborsArgs
[
10
]
=
&
neighbors
->
getDevicePointer
();
neighborsArgs
[
11
]
=
&
neighborIndex
->
getDevicePointer
();
forceArgs
[
17
]
=
&
neighbors
->
getDevicePointer
();
forceArgs
[
18
]
=
&
neighborIndex
->
getDevicePointer
();
}
}
cu
.
executeKernel
(
torqueKernel
,
&
torqueArgs
[
0
],
numRealParticles
);
return
0.0
;
}
void
CudaCalcGayBerneForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
)
{
// Make sure the new parameters are acceptable.
if
(
force
.
getNumParticles
()
!=
cu
.
getNumAtoms
())
throw
OpenMMException
(
"updateParametersInContext: The number of particles has changed"
);
vector
<
int
>
exceptions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
sigma
,
epsilon
);
if
(
exceptionAtoms
.
size
()
>
exceptions
.
size
()
&&
make_pair
(
particle1
,
particle2
)
==
exceptionAtoms
[
exceptions
.
size
()])
exceptions
.
push_back
(
i
);
else
if
(
epsilon
!=
0.0
)
throw
OpenMMException
(
"updateParametersInContext: The set of non-excluded exceptions has changed"
);
}
int
numExceptions
=
exceptionAtoms
.
size
();
// Record the per-particle parameters.
vector
<
float4
>
sigParamsVector
(
cu
.
getPaddedNumAtoms
(),
make_float4
(
0
,
0
,
0
,
0
));
vector
<
float2
>
epsParamsVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
float4
>
scaleVector
(
cu
.
getPaddedNumAtoms
(),
make_float4
(
0
,
0
,
0
,
0
));
for
(
int
i
=
0
;
i
<
force
.
getNumParticles
();
i
++
)
{
int
xparticle
,
yparticle
;
double
sigma
,
epsilon
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
;
force
.
getParticleParameters
(
i
,
sigma
,
epsilon
,
xparticle
,
yparticle
,
sx
,
sy
,
sz
,
ex
,
ey
,
ez
);
sigParamsVector
[
i
]
=
make_float4
((
float
)
(
0.5
*
sigma
),
(
float
)
(
0.25
*
sx
*
sx
),
(
float
)
(
0.25
*
sy
*
sy
),
(
float
)
(
0.25
*
sz
*
sz
));
epsParamsVector
[
i
]
=
make_float2
((
float
)
sqrt
(
epsilon
),
(
float
)
(
0.125
*
(
sx
*
sy
+
sz
*
sz
)
*
sqrt
(
sx
*
sy
)));
scaleVector
[
i
]
=
make_float4
((
float
)
(
1
/
sqrt
(
ex
)),
(
float
)
(
1
/
sqrt
(
ey
)),
(
float
)
(
1
/
sqrt
(
ez
)),
0
);
if
(
epsilon
!=
0.0
&&
!
isRealParticle
[
i
])
throw
OpenMMException
(
"updateParametersInContext: The set of ignored particles (ones with epsilon=0) has changed"
);
}
sigParams
->
upload
(
sigParamsVector
);
epsParams
->
upload
(
epsParamsVector
);
scale
->
upload
(
scaleVector
);
// Record the exceptions.
if
(
numExceptions
>
0
)
{
vector
<
float2
>
exceptionParamsVec
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
{
int
atom1
,
atom2
;
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
exceptions
[
i
],
atom1
,
atom2
,
sigma
,
epsilon
);
exceptionParamsVec
[
i
]
=
make_float2
((
float
)
sigma
,
(
float
)
epsilon
);
}
exceptionParams
->
upload
(
exceptionParamsVec
);
}
cu
.
invalidateMolecules
();
sortAtoms
();
}
void
CudaCalcGayBerneForceKernel
::
sortAtoms
()
{
// Sort the list of atoms by type to avoid thread divergence. This is executed every time
// the atoms are reordered.
int
nextIndex
=
0
;
vector
<
int
>
particles
(
cu
.
getPaddedNumAtoms
(),
0
);
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
vector
<
int
>
inverseOrder
(
order
.
size
(),
-
1
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
{
int
atom
=
order
[
i
];
if
(
isRealParticle
[
atom
])
{
inverseOrder
[
atom
]
=
nextIndex
;
particles
[
nextIndex
++
]
=
atom
;
}
}
sortedParticles
->
upload
(
particles
);
// Update the list of exception particles.
int
numExceptions
=
exceptionAtoms
.
size
();
if
(
numExceptions
>
0
)
{
vector
<
int4
>
exceptionParticlesVec
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
exceptionParticlesVec
[
i
]
=
make_int4
(
exceptionAtoms
[
i
].
first
,
exceptionAtoms
[
i
].
second
,
inverseOrder
[
exceptionAtoms
[
i
].
first
],
inverseOrder
[
exceptionAtoms
[
i
].
second
]);
exceptionParticles
->
upload
(
exceptionParticlesVec
);
}
// Rebuild the list of exclusions.
vector
<
vector
<
int
>
>
excludedAtoms
(
numRealParticles
);
for
(
int
i
=
0
;
i
<
excludedPairs
.
size
();
i
++
)
{
int
first
=
inverseOrder
[
min
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
)];
int
second
=
inverseOrder
[
max
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
)];
excludedAtoms
[
first
].
push_back
(
second
);
}
int
index
=
0
;
vector
<
int
>
exclusionVec
(
exclusions
->
getSize
());
vector
<
int
>
startIndexVec
(
exclusionStartIndex
->
getSize
());
for
(
int
i
=
0
;
i
<
numRealParticles
;
i
++
)
{
startIndexVec
[
i
]
=
index
;
for
(
int
j
=
0
;
j
<
excludedAtoms
[
i
].
size
();
j
++
)
exclusionVec
[
index
++
]
=
excludedAtoms
[
i
][
j
];
}
startIndexVec
[
numRealParticles
]
=
index
;
exclusions
->
upload
(
exclusionVec
);
exclusionStartIndex
->
upload
(
startIndexVec
);
}
CudaIntegrateVerletStepKernel
::~
CudaIntegrateVerletStepKernel
()
{
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
222b3bb4
...
...
@@ -92,6 +92,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
...
...
platforms/cuda/src/kernels/gayBerne.cu
0 → 100644
View file @
222b3bb4
#define TILE_SIZE 32
#define NEIGHBOR_BLOCK_SIZE 32
/**
* Calculate the ellipsoid coordinate frames and associated matrices.
*/
extern
"C"
__global__
void
computeEllipsoidFrames
(
int
numParticles
,
const
real4
*
__restrict__
posq
,
int2
*
const
__restrict__
axisParticleIndices
,
const
float4
*
__restrict__
sigParams
,
const
float4
*
__restrict__
scale
,
real
*
__restrict__
aMatrix
,
real
*
__restrict__
bMatrix
,
real
*
__restrict__
gMatrix
,
const
int
*
sortedParticles
)
{
for
(
int
sortedIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
sortedIndex
<
numParticles
;
sortedIndex
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the local coordinate system of the ellipsoid;
int
originalIndex
=
sortedParticles
[
sortedIndex
];
real3
pos
=
trimTo3
(
posq
[
originalIndex
]);
int2
axisParticles
=
axisParticleIndices
[
originalIndex
];
real3
xdir
,
ydir
,
zdir
;
if
(
axisParticles
.
x
==
-
1
)
{
xdir
=
make_real3
(
1
,
0
,
0
);
ydir
=
make_real3
(
0
,
1
,
0
);
}
else
{
xdir
=
pos
-
trimTo3
(
posq
[
axisParticles
.
x
]);
xdir
=
normalize
(
xdir
);
if
(
axisParticles
.
y
==
-
1
)
{
if
(
xdir
.
y
>
-
0.5
f
&&
xdir
.
y
<
0.5
f
)
ydir
=
make_real3
(
0
,
1
,
0
);
else
ydir
=
make_real3
(
1
,
0
,
0
);
}
else
ydir
=
pos
-
trimTo3
(
posq
[
axisParticles
.
y
]);
ydir
-=
xdir
*
dot
(
xdir
,
ydir
);
ydir
=
normalize
(
ydir
);
}
zdir
=
cross
(
xdir
,
ydir
);
// Compute matrices we will need later.
real
(
*
a
)[
3
]
=
(
real
(
*
)[
3
])
(
aMatrix
+
sortedIndex
*
9
);
real
(
*
b
)[
3
]
=
(
real
(
*
)[
3
])
(
bMatrix
+
sortedIndex
*
9
);
real
(
*
g
)[
3
]
=
(
real
(
*
)[
3
])
(
gMatrix
+
sortedIndex
*
9
);
a
[
0
][
0
]
=
xdir
.
x
;
a
[
0
][
1
]
=
xdir
.
y
;
a
[
0
][
2
]
=
xdir
.
z
;
a
[
1
][
0
]
=
ydir
.
x
;
a
[
1
][
1
]
=
ydir
.
y
;
a
[
1
][
2
]
=
ydir
.
z
;
a
[
2
][
0
]
=
zdir
.
x
;
a
[
2
][
1
]
=
zdir
.
y
;
a
[
2
][
2
]
=
zdir
.
z
;
float4
sig
=
sigParams
[
originalIndex
];
float3
r2
=
sig
.
yzw
;
float3
e2
=
trimTo3
(
scale
[
originalIndex
]);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
b
[
i
][
j
]
=
a
[
0
][
i
]
*
e2
.
x
*
a
[
0
][
j
]
+
a
[
1
][
i
]
*
e2
.
y
*
a
[
1
][
j
]
+
a
[
2
][
i
]
*
e2
.
z
*
a
[
2
][
j
];
g
[
i
][
j
]
=
a
[
0
][
i
]
*
r2
.
x
*
a
[
0
][
j
]
+
a
[
1
][
i
]
*
r2
.
y
*
a
[
1
][
j
]
+
a
[
2
][
i
]
*
r2
.
z
*
a
[
2
][
j
];
}
}
}
/**
* Find a bounding box for the atoms in each block.
*/
extern
"C"
__global__
void
findBlockBounds
(
int
numAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
const
int
*
sortedAtoms
,
const
real4
*
__restrict__
posq
,
real4
*
__restrict__
sortedPos
,
real4
*
__restrict__
blockCenter
,
real4
*
__restrict__
blockBoundingBox
,
int
*
__restrict__
neighborBlockCount
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
base
=
index
*
TILE_SIZE
;
while
(
base
<
numAtoms
)
{
real4
pos
=
posq
[
sortedAtoms
[
base
]];
sortedPos
[
base
]
=
pos
;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_POS
(
pos
)
#endif
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
int
last
=
min
(
base
+
TILE_SIZE
,
numAtoms
);
for
(
int
i
=
base
+
1
;
i
<
last
;
i
++
)
{
pos
=
posq
[
sortedAtoms
[
i
]];
sortedPos
[
i
]
=
pos
;
#ifdef USE_PERIODIC
real4
center
=
0.5
f
*
(
maxPos
+
minPos
);
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos
,
center
)
#endif
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
real4
blockSize
=
0.5
f
*
(
maxPos
-
minPos
);
blockBoundingBox
[
index
]
=
blockSize
;
blockCenter
[
index
]
=
0.5
f
*
(
maxPos
+
minPos
);
index
+=
blockDim
.
x
*
gridDim
.
x
;
base
=
index
*
TILE_SIZE
;
}
if
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
==
0
)
*
neighborBlockCount
=
0
;
}
/**
* This is called by findNeighbors() to write a block to the neighbor list.
*/
void
storeNeighbors
(
int
atom1
,
int
*
neighborBuffer
,
int
numAtomsInBuffer
,
int
maxNeighborBlocks
,
int
*
__restrict__
neighbors
,
int
*
__restrict__
neighborIndex
,
int
*
__restrict__
neighborBlockCount
)
{
int
blockIndex
=
atomicAdd
(
neighborBlockCount
,
1
);
if
(
blockIndex
>=
maxNeighborBlocks
)
return
;
// We don't have enough room for the neighbor list.
neighborIndex
[
blockIndex
]
=
atom1
;
int
baseIndex
=
blockIndex
*
NEIGHBOR_BLOCK_SIZE
;
for
(
int
i
=
0
;
i
<
numAtomsInBuffer
;
i
++
)
neighbors
[
baseIndex
+
i
]
=
neighborBuffer
[
i
];
for
(
int
i
=
numAtomsInBuffer
;
i
<
NEIGHBOR_BLOCK_SIZE
;
i
++
)
neighbors
[
baseIndex
+
i
]
=
-
1
;
}
/**
* Build a list of neighbors for each atom.
*/
extern
"C"
__global__
void
findNeighbors
(
int
numAtoms
,
int
maxNeighborBlocks
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
*
__restrict__
sortedPos
,
real4
*
__restrict__
blockCenter
,
real4
*
__restrict__
blockBoundingBox
,
int
*
__restrict__
neighbors
,
int
*
__restrict__
neighborIndex
,
int
*
__restrict__
neighborBlockCount
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
)
{
const
int
numBlocks
=
(
numAtoms
+
TILE_SIZE
-
1
)
/
TILE_SIZE
;
int
neighborBuffer
[
NEIGHBOR_BLOCK_SIZE
];
for
(
int
atom1
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom1
<
numAtoms
;
atom1
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
nextExclusion
=
exclusionStartIndex
[
atom1
];
int
lastExclusion
=
exclusionStartIndex
[
atom1
+
1
];
real4
pos
=
sortedPos
[
atom1
];
int
nextBufferIndex
=
0
;
// Loop over atom blocks and compute the distance of this atom from each one's bounding box.
for
(
int
block
=
(
atom1
+
1
)
/
TILE_SIZE
;
block
<
numBlocks
;
block
++
)
{
real4
center
=
blockCenter
[
block
];
real4
blockSize
=
blockBoundingBox
[
block
];
real4
blockDelta
=
center
-
pos
;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#endif
blockDelta
.
x
=
max
((
real
)
0
,
fabs
(
blockDelta
.
x
)
-
blockSize
.
x
);
blockDelta
.
y
=
max
((
real
)
0
,
fabs
(
blockDelta
.
y
)
-
blockSize
.
y
);
blockDelta
.
z
=
max
((
real
)
0
,
fabs
(
blockDelta
.
z
)
-
blockSize
.
z
);
if
(
blockDelta
.
x
*
blockDelta
.
x
+
blockDelta
.
y
*
blockDelta
.
y
+
blockDelta
.
z
*
blockDelta
.
z
>=
CUTOFF_SQUARED
)
continue
;
// Loop over atoms within this block.
int
first
=
max
(
block
*
TILE_SIZE
,
atom1
+
1
);
int
last
=
min
((
block
+
1
)
*
TILE_SIZE
,
numAtoms
);
for
(
int
atom2
=
first
;
atom2
<
last
;
atom2
++
)
{
// Skip over excluded interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions
[
nextExclusion
]
>=
atom2
)
{
nextExclusion
++
;
continue
;
}
real4
delta
=
pos
-
sortedPos
[
atom2
];
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
neighborBuffer
[
nextBufferIndex
++
]
=
atom2
;
if
(
nextBufferIndex
==
NEIGHBOR_BLOCK_SIZE
)
{
storeNeighbors
(
atom1
,
neighborBuffer
,
nextBufferIndex
,
maxNeighborBlocks
,
neighbors
,
neighborIndex
,
neighborBlockCount
);
nextBufferIndex
=
0
;
}
}
}
}
if
(
nextBufferIndex
>
0
)
storeNeighbors
(
atom1
,
neighborBuffer
,
nextBufferIndex
,
maxNeighborBlocks
,
neighbors
,
neighborIndex
,
neighborBlockCount
);
}
}
typedef
struct
{
float4
sig
;
float2
eps
;
real3
pos
;
real
a
[
3
][
3
],
b
[
3
][
3
],
g
[
3
][
3
];
}
AtomData
;
void
loadAtomData
(
AtomData
*
data
,
int
sortedIndex
,
int
originalIndex
,
const
real4
*
__restrict__
pos
,
const
float4
*
__restrict__
sigParams
,
const
float2
*
__restrict__
epsParams
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
)
{
data
->
sig
=
sigParams
[
originalIndex
];
data
->
eps
=
epsParams
[
originalIndex
];
data
->
pos
=
trimTo3
(
pos
[
sortedIndex
]);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
int
k
=
9
*
sortedIndex
+
3
*
i
+
j
;
data
->
a
[
i
][
j
]
=
aMatrix
[
k
];
data
->
b
[
i
][
j
]
=
bMatrix
[
k
];
data
->
g
[
i
][
j
]
=
gMatrix
[
k
];
}
}
real3
matrixVectorProduct
(
real
(
*
m
)[
3
],
real3
v
)
{
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
0
][
1
]
*
v
.
y
+
m
[
0
][
2
]
*
v
.
z
,
m
[
1
][
0
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
1
][
2
]
*
v
.
z
,
m
[
2
][
0
]
*
v
.
x
+
m
[
2
][
1
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
}
real3
vectorMatrixProduct
(
real3
v
,
real
(
*
m
)[
3
])
{
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
1
][
0
]
*
v
.
y
+
m
[
2
][
0
]
*
v
.
z
,
m
[
0
][
1
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
2
][
1
]
*
v
.
z
,
m
[
0
][
2
]
*
v
.
x
+
m
[
1
][
2
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
}
void
matrixSum
(
real
(
*
result
)[
3
],
real
(
*
a
)[
3
],
real
(
*
b
)[
3
])
{
result
[
0
][
0
]
=
a
[
0
][
0
]
+
b
[
0
][
0
];
result
[
0
][
1
]
=
a
[
0
][
1
]
+
b
[
0
][
1
];
result
[
0
][
2
]
=
a
[
0
][
2
]
+
b
[
0
][
2
];
result
[
1
][
0
]
=
a
[
1
][
0
]
+
b
[
1
][
0
];
result
[
1
][
1
]
=
a
[
1
][
1
]
+
b
[
1
][
1
];
result
[
1
][
2
]
=
a
[
1
][
2
]
+
b
[
1
][
2
];
result
[
2
][
0
]
=
a
[
2
][
0
]
+
b
[
2
][
0
];
result
[
2
][
1
]
=
a
[
2
][
1
]
+
b
[
2
][
1
];
result
[
2
][
2
]
=
a
[
2
][
2
]
+
b
[
2
][
2
];
}
real
determinant
(
real
(
*
m
)[
3
])
{
return
(
m
[
0
][
0
]
*
m
[
1
][
1
]
*
m
[
2
][
2
]
+
m
[
0
][
1
]
*
m
[
1
][
2
]
*
m
[
2
][
0
]
+
m
[
0
][
2
]
*
m
[
1
][
0
]
*
m
[
2
][
1
]
-
m
[
0
][
0
]
*
m
[
1
][
2
]
*
m
[
2
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
0
][
2
]
*
m
[
1
][
1
]
*
m
[
2
][
0
]);
}
void
matrixInverse
(
real
(
*
result
)[
3
],
real
(
*
m
)[
3
])
{
real
invDet
=
RECIP
(
determinant
(
m
));
result
[
0
][
0
]
=
invDet
*
(
m
[
1
][
1
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
1
]);
result
[
1
][
0
]
=
-
invDet
*
(
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
0
]);
result
[
2
][
0
]
=
invDet
*
(
m
[
1
][
0
]
*
m
[
2
][
1
]
-
m
[
1
][
1
]
*
m
[
2
][
0
]);
result
[
0
][
1
]
=
-
invDet
*
(
m
[
0
][
1
]
*
m
[
2
][
2
]
-
m
[
0
][
2
]
*
m
[
2
][
1
]);
result
[
1
][
1
]
=
invDet
*
(
m
[
0
][
0
]
*
m
[
2
][
2
]
-
m
[
0
][
2
]
*
m
[
2
][
0
]);
result
[
2
][
1
]
=
-
invDet
*
(
m
[
0
][
0
]
*
m
[
2
][
1
]
-
m
[
0
][
1
]
*
m
[
2
][
0
]);
result
[
0
][
2
]
=
invDet
*
(
m
[
0
][
1
]
*
m
[
1
][
2
]
-
m
[
0
][
2
]
*
m
[
1
][
1
]);
result
[
1
][
2
]
=
-
invDet
*
(
m
[
0
][
0
]
*
m
[
1
][
2
]
-
m
[
0
][
2
]
*
m
[
1
][
0
]);
result
[
2
][
2
]
=
invDet
*
(
m
[
0
][
0
]
*
m
[
1
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]);
}
void
computeOneInteraction
(
AtomData
*
data1
,
AtomData
*
data2
,
real
sigma
,
real
epsilon
,
real3
dr
,
real
r2
,
real3
*
force1
,
real3
*
force2
,
real3
*
torque1
,
real3
*
torque2
,
real
*
totalEnergy
)
{
real
rInv
=
RSQRT
(
r2
);
real
r
=
r2
*
rInv
;
real3
drUnit
=
dr
*
rInv
;
// Compute the switching function.
real
switchValue
=
1
,
switchDeriv
=
0
;
#if USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r
-
SWITCH_CUTOFF
;
switchValue
=
1
+
x
*
x
*
x
*
(
SWITCH_C3
+
x
*
(
SWITCH_C4
+
x
*
SWITCH_C5
));
switchDeriv
=
x
*
x
*
(
3
*
SWITCH_C3
+
x
*
(
4
*
SWITCH_C4
+
x
*
5
*
SWITCH_C5
));
}
#endif
// Compute vectors and matrices we'll be needing.
real
B12
[
3
][
3
],
G12
[
3
][
3
],
B12inv
[
3
][
3
],
G12inv
[
3
][
3
];
matrixSum
(
B12
,
data1
->
b
,
data2
->
b
);
matrixSum
(
G12
,
data1
->
g
,
data2
->
g
);
matrixInverse
(
B12inv
,
B12
);
matrixInverse
(
G12inv
,
G12
);
real
detG12
=
determinant
(
G12
);
// Estimate the distance between the ellipsoids and compute the first terms needed for the energy.
real
sigma12
=
1
/
SQRT
(
0.5
f
*
dot
(
drUnit
,
matrixVectorProduct
(
G12inv
,
drUnit
)));
real
h12
=
r
-
sigma12
;
real
rho
=
sigma
/
(
h12
+
sigma
);
real
rho2
=
rho
*
rho
;
real
rho6
=
rho2
*
rho2
*
rho2
;
real
u
=
4
*
epsilon
*
(
rho6
*
rho6
-
rho6
);
real
eta
=
SQRT
(
2
*
data1
->
eps
.
y
*
data2
->
eps
.
y
/
detG12
);
real
chi
=
2
*
dot
(
drUnit
,
matrixVectorProduct
(
B12inv
,
drUnit
));
chi
*=
chi
;
real
energy
=
u
*
eta
*
chi
;
// Compute the terms needed for the force.
real3
kappa
=
matrixVectorProduct
(
G12inv
,
dr
);
real3
iota
=
matrixVectorProduct
(
B12inv
,
dr
);
real
rInv2
=
rInv
*
rInv
;
real
dUSLJdr
=
24
*
epsilon
*
(
2
*
rho6
-
1
)
*
rho6
*
rho
/
sigma
;
real
temp
=
0.5
f
*
sigma12
*
sigma12
*
sigma12
*
rInv2
;
real3
dudr
=
(
drUnit
+
(
kappa
-
drUnit
*
dot
(
kappa
,
drUnit
))
*
temp
)
*
dUSLJdr
;
real3
dchidr
=
(
iota
-
drUnit
*
dot
(
iota
,
drUnit
))
*
(
-
8
*
rInv2
*
SQRT
(
chi
));
real3
force
=
(
dchidr
*
u
+
dudr
*
chi
)
*
(
eta
*
switchValue
)
-
drUnit
*
(
energy
*
switchDeriv
);
*
force1
+=
force
;
*
force2
-=
force
;
// Compute the terms needed for the torque.
for
(
int
j
=
0
;
j
<
2
;
j
++
)
{
real
(
*
a
)[
3
]
=
(
j
==
0
?
data1
->
a
:
data2
->
a
);
real
(
*
b
)[
3
]
=
(
j
==
0
?
data1
->
b
:
data2
->
b
);
real
(
*
g
)[
3
]
=
(
j
==
0
?
data1
->
g
:
data2
->
g
);
float4
sig
=
(
j
==
0
?
data1
->
sig
:
data2
->
sig
);
real3
dudq
=
cross
(
vectorMatrixProduct
(
kappa
,
g
),
kappa
*
(
temp
*
dUSLJdr
));
real3
dchidq
=
cross
(
vectorMatrixProduct
(
iota
,
b
),
iota
)
*
(
-
4
*
rInv2
);
real3
scale
=
make_real3
(
sig
.
y
,
sig
.
z
,
sig
.
w
)
*
(
-
0.5
f
*
eta
/
detG12
);
real
d
[
3
][
3
];
d
[
0
][
0
]
=
scale
.
x
*
(
2
*
a
[
0
][
0
]
*
(
G12
[
1
][
1
]
*
G12
[
2
][
2
]
-
G12
[
1
][
2
]
*
G12
[
2
][
1
])
+
a
[
0
][
2
]
*
(
G12
[
1
][
2
]
*
G12
[
0
][
1
]
+
G12
[
1
][
0
]
*
G12
[
2
][
1
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
a
[
0
][
1
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
2
][
0
]
*
G12
[
1
][
2
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
])));
d
[
0
][
1
]
=
scale
.
x
*
(
a
[
0
][
0
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
2
][
0
]
*
G12
[
1
][
2
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
]))
+
2
*
a
[
0
][
1
]
*
(
G12
[
0
][
0
]
*
G12
[
2
][
2
]
-
G12
[
2
][
0
]
*
G12
[
0
][
2
])
+
a
[
0
][
2
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
2
][
0
]
*
G12
[
0
][
1
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
])));
d
[
0
][
2
]
=
scale
.
x
*
(
a
[
0
][
0
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
1
][
0
]
*
G12
[
2
][
1
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
a
[
0
][
1
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
2
][
0
]
*
G12
[
0
][
1
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
]))
+
2
*
a
[
0
][
2
]
*
(
G12
[
1
][
1
]
*
G12
[
0
][
0
]
-
G12
[
1
][
0
]
*
G12
[
0
][
1
]));
d
[
1
][
0
]
=
scale
.
y
*
(
2
*
a
[
1
][
0
]
*
(
G12
[
1
][
1
]
*
G12
[
2
][
2
]
-
G12
[
1
][
2
]
*
G12
[
2
][
1
])
+
a
[
1
][
1
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
2
][
0
]
*
G12
[
1
][
2
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
]))
+
a
[
1
][
2
]
*
(
G12
[
1
][
2
]
*
G12
[
0
][
1
]
+
G12
[
1
][
0
]
*
G12
[
2
][
1
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
])));
d
[
1
][
1
]
=
scale
.
y
*
(
a
[
1
][
0
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
2
][
0
]
*
G12
[
1
][
2
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
]))
+
2
*
a
[
1
][
1
]
*
(
G12
[
2
][
2
]
*
G12
[
0
][
0
]
-
G12
[
2
][
0
]
*
G12
[
0
][
2
])
+
a
[
1
][
2
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
0
][
1
]
*
G12
[
2
][
0
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
])));
d
[
1
][
2
]
=
scale
.
y
*
(
a
[
1
][
0
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
1
][
0
]
*
G12
[
2
][
1
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
a
[
1
][
1
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
0
][
1
]
*
G12
[
2
][
0
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
]))
+
2
*
a
[
1
][
2
]
*
(
G12
[
1
][
1
]
*
G12
[
0
][
0
]
-
G12
[
1
][
0
]
*
G12
[
0
][
1
]));
d
[
2
][
0
]
=
scale
.
z
*
(
2
*
a
[
2
][
0
]
*
(
G12
[
1
][
1
]
*
G12
[
2
][
2
]
-
G12
[
2
][
1
]
*
G12
[
1
][
2
])
+
a
[
2
][
1
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
1
][
2
]
*
G12
[
2
][
0
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
]))
+
a
[
2
][
2
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
2
][
1
]
*
G12
[
1
][
0
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
])));
d
[
2
][
1
]
=
scale
.
z
*
(
a
[
2
][
0
]
*
(
G12
[
0
][
2
]
*
G12
[
2
][
1
]
+
G12
[
1
][
2
]
*
G12
[
2
][
0
]
-
G12
[
2
][
2
]
*
(
G12
[
0
][
1
]
+
G12
[
1
][
0
]))
+
2
*
a
[
2
][
1
]
*
(
G12
[
0
][
0
]
*
G12
[
2
][
2
]
-
G12
[
0
][
2
]
*
G12
[
2
][
0
])
+
a
[
2
][
2
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
0
][
1
]
*
G12
[
2
][
0
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
])));
d
[
2
][
2
]
=
scale
.
z
*
(
a
[
2
][
0
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
2
][
1
]
*
G12
[
1
][
0
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
a
[
2
][
1
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
2
][
0
]
*
G12
[
0
][
1
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
]))
+
2
*
a
[
2
][
2
]
*
(
G12
[
1
][
1
]
*
G12
[
0
][
0
]
-
G12
[
1
][
0
]
*
G12
[
0
][
1
]));
real3
detadq
=
0
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
detadq
+=
cross
(
make_real3
(
a
[
i
][
0
],
a
[
i
][
1
],
a
[
i
][
2
]),
make_real3
(
d
[
i
][
0
],
d
[
i
][
1
],
d
[
i
][
2
]));
real3
torque
=
(
dchidq
*
(
u
*
eta
)
+
detadq
*
(
u
*
chi
)
+
dudq
*
(
eta
*
chi
))
*
switchValue
;
*
(
j
==
0
?
torque1
:
torque2
)
-=
torque
;
}
*
totalEnergy
+=
switchValue
*
energy
;
}
/**
* Compute the interactions.
*/
extern
"C"
__global__
void
computeForce
(
long
*
__restrict__
forceBuffers
,
long
*
__restrict__
torqueBuffers
,
int
numAtoms
,
int
numExceptions
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
pos
,
const
float4
*
__restrict__
sigParams
,
const
float2
*
__restrict__
epsParams
,
const
int
*
__restrict__
sortedAtoms
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
,
const
int4
*
__restrict__
exceptionParticles
,
const
float2
*
__restrict__
exceptionParams
#ifdef USE_CUTOFF
,
int
maxNeighborBlocks
,
int
*
__restrict__
neighbors
,
int
*
__restrict__
neighborIndex
,
int
*
__restrict__
neighborBlockCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
#endif
)
{
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
mixed
energy
=
0
;
#ifdef USE_CUTOFF
const
int
numBlocks
=
*
neighborBlockCount
;
if
(
numBlocks
>
maxNeighborBlocks
)
return
;
// There wasn't enough memory for the neighbor list.
for
(
int
block
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
block
<
numBlocks
;
block
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load parameters for atom1.
int
atom1
=
neighborIndex
[
block
];
int
index1
=
sortedAtoms
[
atom1
];
AtomData
data1
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0.0
f
;
real3
torque1
=
0.0
f
;
for
(
int
indexInBlock
=
0
;
indexInBlock
<
NEIGHBOR_BLOCK_SIZE
;
indexInBlock
++
)
{
// Load parameters for atom2.
int
atom2
=
neighbors
[
NEIGHBOR_BLOCK_SIZE
*
block
+
indexInBlock
];
if
(
atom2
==
-
1
)
continue
;
int
index2
=
sortedAtoms
[
atom2
];
AtomData
data2
;
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force2
=
0.0
f
;
real3
torque2
=
0.0
f
;
// Compute the interaction.
real3
delta
=
data1
.
pos
-
data2
.
pos
;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
atomicAdd
(
&
forceBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
z
*
0x100000000
)));
}
atomicAdd
(
&
forceBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
z
*
0x100000000
)));
}
#else
for
(
int
atom1
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom1
<
numAtoms
;
atom1
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load parameters for atom1.
int
index1
=
sortedAtoms
[
atom1
];
AtomData
data1
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0.0
f
;
real3
torque1
=
0.0
f
;
int
nextExclusion
=
exclusionStartIndex
[
atom1
];
int
lastExclusion
=
exclusionStartIndex
[
atom1
+
1
];
for
(
int
atom2
=
atom1
+
1
;
atom2
<
numAtoms
;
atom2
++
)
{
// Skip over excluded interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions
[
nextExclusion
]
==
atom2
)
{
nextExclusion
++
;
continue
;
}
// Load parameters for atom2.
int
index2
=
sortedAtoms
[
atom2
];
AtomData
data2
;
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force2
=
0.0
f
;
real3
torque2
=
0.0
f
;
// Compute the interaction.
real3
delta
=
data1
.
pos
-
data2
.
pos
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
atomicAdd
(
&
forceBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
z
*
0x100000000
)));
}
atomicAdd
(
&
forceBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
z
*
0x100000000
)));
}
#endif
// Now compute exceptions.
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numExceptions
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
atomIndices
=
exceptionParticles
[
index
];
float2
params
=
exceptionParams
[
index
];
int
index1
=
atomIndices
.
x
,
index2
=
atomIndices
.
y
;
int
atom1
=
atomIndices
.
z
,
atom2
=
atomIndices
.
w
;
AtomData
data1
,
data2
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0
,
force2
=
0
;
real3
torque1
=
0
,
torque2
=
0
;
real3
delta
=
data1
.
pos
-
data2
.
pos
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
computeOneInteraction
(
&
data1
,
&
data2
,
params
.
x
,
params
.
y
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
atomicAdd
(
&
forceBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force1
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force2
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque1
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
torque2
.
z
*
0x100000000
)));
#ifdef USE_CUTOFF
}
#endif
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
/**
* Convert the torques to forces on the connected particles.
*/
extern
"C"
__global__
void
applyTorques
(
long
*
__restrict__
forceBuffers
,
long
*
__restrict__
torqueBuffers
,
int
numParticles
,
const
real4
*
__restrict__
posq
,
int2
*
const
__restrict__
axisParticleIndices
,
const
int
*
sortedParticles
)
{
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
for
(
int
sortedIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
sortedIndex
<
numParticles
;
sortedIndex
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
originalIndex
=
sortedParticles
[
sortedIndex
];
real3
pos
=
trimTo3
(
posq
[
originalIndex
]);
int2
axisParticles
=
axisParticleIndices
[
originalIndex
];
if
(
axisParticles
.
x
!=
-
1
)
{
// Load the torque.
real
scale
=
1
/
(
real
)
0x100000000
;
real3
torque
=
make_real3
(
scale
*
torqueBuffers
[
originalIndex
],
scale
*
torqueBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
scale
*
torqueBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
]);
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
// Apply a force to the x particle.
real3
dx
=
trimTo3
(
posq
[
axisParticles
.
x
])
-
pos
;
real
dx2
=
dot
(
dx
,
dx
);
real3
f
=
cross
(
torque
,
dx
)
/
dx2
;
xforce
+=
f
;
force
-=
f
;
if
(
axisParticles
.
y
!=
-
1
)
{
// Apply a force to the y particle. This is based on the component of the torque
// that was not already applied to the x particle.
real3
dy
=
trimTo3
(
posq
[
axisParticles
.
y
])
-
pos
;
real
dy2
=
dot
(
dy
,
dy
);
real3
torque2
=
dx
*
dot
(
torque
,
dx
)
/
dx2
;
f
=
cross
(
torque2
,
dy
)
/
dy2
;
yforce
+=
f
;
force
-=
f
;
}
atomicAdd
(
&
forceBuffers
[
originalIndex
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
axisParticles
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
xforce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
axisParticles
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
xforce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
axisParticles
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
xforce
.
z
*
0x100000000
)));
if
(
axisParticles
.
y
!=
-
1
)
{
atomicAdd
(
&
forceBuffers
[
axisParticles
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
yforce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
axisParticles
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
yforce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
axisParticles
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
yforce
.
z
*
0x100000000
)));
}
}
}
}
platforms/cuda/tests/TestCudaGayBerneForce.cpp
0 → 100644
View file @
222b3bb4
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestGayBerneForce.h"
void
runPlatformTests
()
{
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment