Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
24b68822
"vscode:/vscode.git/clone" did not exist on "caefd490d042ff9afbb8bb0a47b9a542e64a6c38"
Commit
24b68822
authored
Aug 09, 2016
by
peastman
Browse files
Merge branch 'gayberne' of
https://github.com/peastman/openmm
into gayberne
parents
d676ed8e
01440b46
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
34 additions
and
35 deletions
+34
-35
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+7
-6
platforms/cuda/src/kernels/gayBerne.cu
platforms/cuda/src/kernels/gayBerne.cu
+25
-27
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+2
-2
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
24b68822
...
@@ -6161,9 +6161,9 @@ void CudaCalcGayBerneForceKernel::initialize(const System& system, const GayBern
...
@@ -6161,9 +6161,9 @@ void CudaCalcGayBerneForceKernel::initialize(const System& system, const GayBern
sortedPos = new CudaArray(cu, numRealParticles, 4*elementSize, "sortedPos");
sortedPos = new CudaArray(cu, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
maxNeighborBlocks = numRealParticles*2;
neighbors = CudaArray::create<int>(cu, maxNeighborBlocks*32, "neighbors");
neighbors = CudaArray::create<int>(cu, maxNeighborBlocks*32, "neighbors");
neighborIndex = CudaArray::create<int>(cu, maxNeighborBlocks, "neighbor
s
");
neighborIndex = CudaArray::create<int>(cu, maxNeighborBlocks, "neighbor
Index
");
neighborBlockCount = CudaArray::create<int>(cu, 1, "neighborBlockCount");
neighborBlockCount = CudaArray::create<int>(cu, 1, "neighborBlockCount");
if (force.getNonbondedMethod() != Gay
b
erneForce::NoCutoff)
if (force.getNonbondedMethod() != Gay
B
erneForce::NoCutoff)
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
// Create array for accumulating torques.
// Create array for accumulating torques.
...
@@ -6195,7 +6195,7 @@ void CudaCalcGayBerneForceKernel::initialize(const System& system, const GayBern
...
@@ -6195,7 +6195,7 @@ void CudaCalcGayBerneForceKernel::initialize(const System& system, const GayBern
}
}
}
}
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(Cuda
k
ernelSources::vectorOps+CudaKernelSources::gayBerne, defines);
CUmodule module = cu.createModule(Cuda
K
ernelSources::vectorOps+CudaKernelSources::gayBerne, defines);
framesKernel = cu.getKernel(module, "computeEllipsoidFrames");
framesKernel = cu.getKernel(module, "computeEllipsoidFrames");
blockBoundsKernel = cu.getKernel(module, "findBlockBounds");
blockBoundsKernel = cu.getKernel(module, "findBlockBounds");
neighborsKernel = cu.getKernel(module, "findNeighbors");
neighborsKernel = cu.getKernel(module, "findNeighbors");
...
@@ -6245,7 +6245,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -6245,7 +6245,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
neighborsArgs.push_back(&neighborBlockCount->getDevicePointer());
neighborsArgs.push_back(&neighborBlockCount->getDevicePointer());
neighborsArgs.push_back(&exclusions->getDevicePointer());
neighborsArgs.push_back(&exclusions->getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex->getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex->getDevicePointer());
forceArgs.push_back(&cu.get
Long
Force
Buffer
().getDevicePointer());
forceArgs.push_back(&cu.getForce().getDevicePointer());
forceArgs.push_back(&torque->getDevicePointer());
forceArgs.push_back(&torque->getDevicePointer());
forceArgs.push_back(&numRealParticles);
forceArgs.push_back(&numRealParticles);
forceArgs.push_back(&numExceptions);
forceArgs.push_back(&numExceptions);
...
@@ -6272,7 +6272,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -6272,7 +6272,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
}
}
torqueArgs.push_back(&cu.get
Long
Force
Buffer
().getDevicePointer());
torqueArgs.push_back(&cu.getForce().getDevicePointer());
torqueArgs.push_back(&torque->getDevicePointer());
torqueArgs.push_back(&torque->getDevicePointer());
torqueArgs.push_back(&numRealParticles);
torqueArgs.push_back(&numRealParticles);
torqueArgs.push_back(&cu.getPosq().getDevicePointer());
torqueArgs.push_back(&cu.getPosq().getDevicePointer());
...
@@ -6289,6 +6289,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -6289,6 +6289,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
cu.executeKernel(neighborsKernel, &neighborsArgs[0], numRealParticles);
cu.executeKernel(neighborsKernel, &neighborsArgs[0], numRealParticles);
int* count = (int*) cu.getPinnedBuffer();
int* count = (int*) cu.getPinnedBuffer();
neighborBlockCount->download(count, false);
neighborBlockCount->download(count, false);
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for GayBerneForce");
cu.executeKernel(forceKernel, &forceArgs[0], cu.getNonbondedUtilities().getNumForceThreadBlocks()*cu.getNonbondedUtilities().getForceThreadBlockSize());
cu.executeKernel(forceKernel, &forceArgs[0], cu.getNonbondedUtilities().getNumForceThreadBlocks()*cu.getNonbondedUtilities().getForceThreadBlockSize());
CHECK_RESULT(cuEventSynchronize(event), "Error synchronizing on event for GayBerneForce");
CHECK_RESULT(cuEventSynchronize(event), "Error synchronizing on event for GayBerneForce");
if (*count <= maxNeighborBlocks)
if (*count <= maxNeighborBlocks)
...
@@ -6302,7 +6303,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -6302,7 +6303,7 @@ double CudaCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeFo
neighborIndex = NULL;
neighborIndex = NULL;
maxNeighborBlocks = (int) ceil((*count)*1.1);
maxNeighborBlocks = (int) ceil((*count)*1.1);
neighbors = CudaArray::create<int>(cu, maxNeighborBlocks*32, "neighbors");
neighbors = CudaArray::create<int>(cu, maxNeighborBlocks*32, "neighbors");
neighborIndex = CudaArray::create<int>(cu, maxNeighborBlocks, "neighbor
s
");
neighborIndex = CudaArray::create<int>(cu, maxNeighborBlocks, "neighbor
Index
");
neighborsArgs[10] = &neighbors->getDevicePointer();
neighborsArgs[10] = &neighbors->getDevicePointer();
neighborsArgs[11] = &neighborIndex->getDevicePointer();
neighborsArgs[11] = &neighborIndex->getDevicePointer();
forceArgs[17] = &neighbors->getDevicePointer();
forceArgs[17] = &neighbors->getDevicePointer();
...
...
platforms/cuda/src/kernels/gayBerne.cu
View file @
24b68822
...
@@ -49,7 +49,7 @@ extern "C" __global__ void computeEllipsoidFrames(int numParticles, const real4*
...
@@ -49,7 +49,7 @@ extern "C" __global__ void computeEllipsoidFrames(int numParticles, const real4*
a
[
2
][
1
]
=
zdir
.
y
;
a
[
2
][
1
]
=
zdir
.
y
;
a
[
2
][
2
]
=
zdir
.
z
;
a
[
2
][
2
]
=
zdir
.
z
;
float4
sig
=
sigParams
[
originalIndex
];
float4
sig
=
sigParams
[
originalIndex
];
float3
r2
=
sig
.
yzw
;
float3
r2
=
make_float3
(
sig
.
y
,
sig
.
z
,
sig
.
w
)
;
float3
e2
=
trimTo3
(
scale
[
originalIndex
]);
float3
e2
=
trimTo3
(
scale
[
originalIndex
]);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
...
@@ -83,8 +83,8 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
...
@@ -83,8 +83,8 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
real4
center
=
0.5
f
*
(
maxPos
+
minPos
);
real4
center
=
0.5
f
*
(
maxPos
+
minPos
);
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos
,
center
)
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos
,
center
)
#endif
#endif
minPos
=
m
in
(
minPos
,
pos
);
minPos
=
m
ake_real4
(
min
(
minPos
.
x
,
pos
.
x
),
min
(
minPos
.
y
,
pos
.
y
),
min
(
minPos
.
z
,
pos
.
z
),
0
);
maxPos
=
ma
x
(
maxPos
,
pos
);
maxPos
=
ma
ke_real4
(
max
(
maxPos
.
x
,
pos
.
x
),
max
(
maxPos
.
y
,
pos
.
y
),
max
(
maxPos
.
z
,
pos
.
z
),
0
);
}
}
real4
blockSize
=
0.5
f
*
(
maxPos
-
minPos
);
real4
blockSize
=
0.5
f
*
(
maxPos
-
minPos
);
blockBoundingBox
[
index
]
=
blockSize
;
blockBoundingBox
[
index
]
=
blockSize
;
...
@@ -99,7 +99,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
...
@@ -99,7 +99,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
/**
/**
* This is called by findNeighbors() to write a block to the neighbor list.
* This is called by findNeighbors() to write a block to the neighbor list.
*/
*/
void
storeNeighbors
(
int
atom1
,
int
*
neighborBuffer
,
int
numAtomsInBuffer
,
int
maxNeighborBlocks
,
int
*
__restrict__
neighbors
,
__device__
void
storeNeighbors
(
int
atom1
,
int
*
neighborBuffer
,
int
numAtomsInBuffer
,
int
maxNeighborBlocks
,
int
*
__restrict__
neighbors
,
int
*
__restrict__
neighborIndex
,
int
*
__restrict__
neighborBlockCount
)
{
int
*
__restrict__
neighborIndex
,
int
*
__restrict__
neighborBlockCount
)
{
int
blockIndex
=
atomicAdd
(
neighborBlockCount
,
1
);
int
blockIndex
=
atomicAdd
(
neighborBlockCount
,
1
);
if
(
blockIndex
>=
maxNeighborBlocks
)
if
(
blockIndex
>=
maxNeighborBlocks
)
...
@@ -178,7 +178,7 @@ typedef struct {
...
@@ -178,7 +178,7 @@ typedef struct {
real
a
[
3
][
3
],
b
[
3
][
3
],
g
[
3
][
3
];
real
a
[
3
][
3
],
b
[
3
][
3
],
g
[
3
][
3
];
}
AtomData
;
}
AtomData
;
void
loadAtomData
(
AtomData
*
data
,
int
sortedIndex
,
int
originalIndex
,
const
real4
*
__restrict__
pos
,
const
float4
*
__restrict__
sigParams
,
__device__
void
loadAtomData
(
AtomData
*
data
,
int
sortedIndex
,
int
originalIndex
,
const
real4
*
__restrict__
pos
,
const
float4
*
__restrict__
sigParams
,
const
float2
*
__restrict__
epsParams
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
)
{
const
float2
*
__restrict__
epsParams
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
)
{
data
->
sig
=
sigParams
[
originalIndex
];
data
->
sig
=
sigParams
[
originalIndex
];
data
->
eps
=
epsParams
[
originalIndex
];
data
->
eps
=
epsParams
[
originalIndex
];
...
@@ -192,20 +192,19 @@ void loadAtomData(AtomData* data, int sortedIndex, int originalIndex, const real
...
@@ -192,20 +192,19 @@ void loadAtomData(AtomData* data, int sortedIndex, int originalIndex, const real
}
}
}
}
real3
matrixVectorProduct
(
real
(
*
m
)[
3
],
real3
v
)
{
inline
__device__
real3
matrixVectorProduct
(
real
(
*
m
)[
3
],
real3
v
)
{
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
0
][
1
]
*
v
.
y
+
m
[
0
][
2
]
*
v
.
z
,
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
0
][
1
]
*
v
.
y
+
m
[
0
][
2
]
*
v
.
z
,
m
[
1
][
0
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
1
][
2
]
*
v
.
z
,
m
[
1
][
0
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
1
][
2
]
*
v
.
z
,
m
[
2
][
0
]
*
v
.
x
+
m
[
2
][
1
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
m
[
2
][
0
]
*
v
.
x
+
m
[
2
][
1
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
}
}
real3
vectorMatrixProduct
(
real3
v
,
real
(
*
m
)[
3
])
{
inline
__device__
real3
vectorMatrixProduct
(
real3
v
,
real
(
*
m
)[
3
])
{
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
1
][
0
]
*
v
.
y
+
m
[
2
][
0
]
*
v
.
z
,
return
make_real3
(
m
[
0
][
0
]
*
v
.
x
+
m
[
1
][
0
]
*
v
.
y
+
m
[
2
][
0
]
*
v
.
z
,
m
[
0
][
1
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
2
][
1
]
*
v
.
z
,
m
[
0
][
1
]
*
v
.
x
+
m
[
1
][
1
]
*
v
.
y
+
m
[
2
][
1
]
*
v
.
z
,
m
[
0
][
2
]
*
v
.
x
+
m
[
1
][
2
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
m
[
0
][
2
]
*
v
.
x
+
m
[
1
][
2
]
*
v
.
y
+
m
[
2
][
2
]
*
v
.
z
);
}
}
inline
__device__
void
matrixSum
(
real
(
*
result
)[
3
],
real
(
*
a
)[
3
],
real
(
*
b
)[
3
])
{
void
matrixSum
(
real
(
*
result
)[
3
],
real
(
*
a
)[
3
],
real
(
*
b
)[
3
])
{
result
[
0
][
0
]
=
a
[
0
][
0
]
+
b
[
0
][
0
];
result
[
0
][
0
]
=
a
[
0
][
0
]
+
b
[
0
][
0
];
result
[
0
][
1
]
=
a
[
0
][
1
]
+
b
[
0
][
1
];
result
[
0
][
1
]
=
a
[
0
][
1
]
+
b
[
0
][
1
];
result
[
0
][
2
]
=
a
[
0
][
2
]
+
b
[
0
][
2
];
result
[
0
][
2
]
=
a
[
0
][
2
]
+
b
[
0
][
2
];
...
@@ -217,13 +216,12 @@ void matrixSum(real (*result)[3], real (*a)[3], real (*b)[3]) {
...
@@ -217,13 +216,12 @@ void matrixSum(real (*result)[3], real (*a)[3], real (*b)[3]) {
result
[
2
][
2
]
=
a
[
2
][
2
]
+
b
[
2
][
2
];
result
[
2
][
2
]
=
a
[
2
][
2
]
+
b
[
2
][
2
];
}
}
real
determinant
(
real
(
*
m
)[
3
])
{
inline
__device__
real
determinant
(
real
(
*
m
)[
3
])
{
return
(
m
[
0
][
0
]
*
m
[
1
][
1
]
*
m
[
2
][
2
]
+
m
[
0
][
1
]
*
m
[
1
][
2
]
*
m
[
2
][
0
]
+
m
[
0
][
2
]
*
m
[
1
][
0
]
*
m
[
2
][
1
]
-
return
(
m
[
0
][
0
]
*
m
[
1
][
1
]
*
m
[
2
][
2
]
+
m
[
0
][
1
]
*
m
[
1
][
2
]
*
m
[
2
][
0
]
+
m
[
0
][
2
]
*
m
[
1
][
0
]
*
m
[
2
][
1
]
-
m
[
0
][
0
]
*
m
[
1
][
2
]
*
m
[
2
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
0
][
2
]
*
m
[
1
][
1
]
*
m
[
2
][
0
]);
m
[
0
][
0
]
*
m
[
1
][
2
]
*
m
[
2
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
0
][
2
]
*
m
[
1
][
1
]
*
m
[
2
][
0
]);
}
}
inline
__device__
void
matrixInverse
(
real
(
*
result
)[
3
],
real
(
*
m
)[
3
])
{
void
matrixInverse
(
real
(
*
result
)[
3
],
real
(
*
m
)[
3
])
{
real
invDet
=
RECIP
(
determinant
(
m
));
real
invDet
=
RECIP
(
determinant
(
m
));
result
[
0
][
0
]
=
invDet
*
(
m
[
1
][
1
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
1
]);
result
[
0
][
0
]
=
invDet
*
(
m
[
1
][
1
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
1
]);
result
[
1
][
0
]
=
-
invDet
*
(
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
0
]);
result
[
1
][
0
]
=
-
invDet
*
(
m
[
1
][
0
]
*
m
[
2
][
2
]
-
m
[
1
][
2
]
*
m
[
2
][
0
]);
...
@@ -236,7 +234,7 @@ void matrixInverse(real (*result)[3], real (*m)[3]) {
...
@@ -236,7 +234,7 @@ void matrixInverse(real (*result)[3], real (*m)[3]) {
result
[
2
][
2
]
=
invDet
*
(
m
[
0
][
0
]
*
m
[
1
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]);
result
[
2
][
2
]
=
invDet
*
(
m
[
0
][
0
]
*
m
[
1
][
1
]
-
m
[
0
][
1
]
*
m
[
1
][
0
]);
}
}
void
computeOneInteraction
(
AtomData
*
data1
,
AtomData
*
data2
,
real
sigma
,
real
epsilon
,
real3
dr
,
real
r2
,
real3
*
force1
,
real3
*
force2
,
real3
*
torque1
,
real3
*
torque2
,
real
*
totalEnergy
)
{
__device__
void
computeOneInteraction
(
AtomData
*
data1
,
AtomData
*
data2
,
real
sigma
,
real
epsilon
,
real3
dr
,
real
r2
,
real3
*
force1
,
real3
*
force2
,
real3
*
torque1
,
real3
*
torque2
,
real
*
totalEnergy
)
{
real
rInv
=
RSQRT
(
r2
);
real
rInv
=
RSQRT
(
r2
);
real
r
=
r2
*
rInv
;
real
r
=
r2
*
rInv
;
real3
drUnit
=
dr
*
rInv
;
real3
drUnit
=
dr
*
rInv
;
...
@@ -325,7 +323,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
...
@@ -325,7 +323,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
d
[
2
][
2
]
=
scale
.
z
*
(
a
[
2
][
0
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
2
][
1
]
*
G12
[
1
][
0
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
d
[
2
][
2
]
=
scale
.
z
*
(
a
[
2
][
0
]
*
(
G12
[
0
][
1
]
*
G12
[
1
][
2
]
+
G12
[
2
][
1
]
*
G12
[
1
][
0
]
-
G12
[
1
][
1
]
*
(
G12
[
0
][
2
]
+
G12
[
2
][
0
]))
+
a
[
2
][
1
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
2
][
0
]
*
G12
[
0
][
1
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
]))
+
a
[
2
][
1
]
*
(
G12
[
1
][
0
]
*
G12
[
0
][
2
]
+
G12
[
2
][
0
]
*
G12
[
0
][
1
]
-
G12
[
0
][
0
]
*
(
G12
[
1
][
2
]
+
G12
[
2
][
1
]))
+
2
*
a
[
2
][
2
]
*
(
G12
[
1
][
1
]
*
G12
[
0
][
0
]
-
G12
[
1
][
0
]
*
G12
[
0
][
1
]));
2
*
a
[
2
][
2
]
*
(
G12
[
1
][
1
]
*
G12
[
0
][
0
]
-
G12
[
1
][
0
]
*
G12
[
0
][
1
]));
real3
detadq
=
0
;
real3
detadq
=
make_real3
(
0
)
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
detadq
+=
cross
(
make_real3
(
a
[
i
][
0
],
a
[
i
][
1
],
a
[
i
][
2
]),
make_real3
(
d
[
i
][
0
],
d
[
i
][
1
],
d
[
i
][
2
]));
detadq
+=
cross
(
make_real3
(
a
[
i
][
0
],
a
[
i
][
1
],
a
[
i
][
2
]),
make_real3
(
d
[
i
][
0
],
d
[
i
][
1
],
d
[
i
][
2
]));
real3
torque
=
(
dchidq
*
(
u
*
eta
)
+
detadq
*
(
u
*
chi
)
+
dudq
*
(
eta
*
chi
))
*
switchValue
;
real3
torque
=
(
dchidq
*
(
u
*
eta
)
+
detadq
*
(
u
*
chi
)
+
dudq
*
(
eta
*
chi
))
*
switchValue
;
...
@@ -338,7 +336,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
...
@@ -338,7 +336,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
* Compute the interactions.
* Compute the interactions.
*/
*/
extern
"C"
__global__
void
computeForce
(
extern
"C"
__global__
void
computeForce
(
long
*
__restrict__
forceBuffers
,
long
*
__restrict__
torqueBuffers
,
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
torqueBuffers
,
int
numAtoms
,
int
numExceptions
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
pos
,
int
numAtoms
,
int
numExceptions
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
pos
,
const
float4
*
__restrict__
sigParams
,
const
float2
*
__restrict__
epsParams
,
const
int
*
__restrict__
sortedAtoms
,
const
float4
*
__restrict__
sigParams
,
const
float2
*
__restrict__
epsParams
,
const
int
*
__restrict__
sortedAtoms
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
,
const
real
*
__restrict__
aMatrix
,
const
real
*
__restrict__
bMatrix
,
const
real
*
__restrict__
gMatrix
,
...
@@ -362,8 +360,8 @@ extern "C" __global__ void computeForce(
...
@@ -362,8 +360,8 @@ extern "C" __global__ void computeForce(
int
index1
=
sortedAtoms
[
atom1
];
int
index1
=
sortedAtoms
[
atom1
];
AtomData
data1
;
AtomData
data1
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0.0
f
;
real3
force1
=
make_real3
(
0
)
;
real3
torque1
=
0.0
f
;
real3
torque1
=
make_real3
(
0
)
;
for
(
int
indexInBlock
=
0
;
indexInBlock
<
NEIGHBOR_BLOCK_SIZE
;
indexInBlock
++
)
{
for
(
int
indexInBlock
=
0
;
indexInBlock
<
NEIGHBOR_BLOCK_SIZE
;
indexInBlock
++
)
{
// Load parameters for atom2.
// Load parameters for atom2.
...
@@ -373,8 +371,8 @@ extern "C" __global__ void computeForce(
...
@@ -373,8 +371,8 @@ extern "C" __global__ void computeForce(
int
index2
=
sortedAtoms
[
atom2
];
int
index2
=
sortedAtoms
[
atom2
];
AtomData
data2
;
AtomData
data2
;
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force2
=
0.0
f
;
real3
force2
=
make_real3
(
0
)
;
real3
torque2
=
0.0
f
;
real3
torque2
=
make_real3
(
0
)
;
// Compute the interaction.
// Compute the interaction.
...
@@ -407,8 +405,8 @@ extern "C" __global__ void computeForce(
...
@@ -407,8 +405,8 @@ extern "C" __global__ void computeForce(
int
index1
=
sortedAtoms
[
atom1
];
int
index1
=
sortedAtoms
[
atom1
];
AtomData
data1
;
AtomData
data1
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0.0
f
;
real3
force1
=
make_real3
(
0
)
;
real3
torque1
=
0.0
f
;
real3
torque1
=
make_real3
(
0
)
;
int
nextExclusion
=
exclusionStartIndex
[
atom1
];
int
nextExclusion
=
exclusionStartIndex
[
atom1
];
int
lastExclusion
=
exclusionStartIndex
[
atom1
+
1
];
int
lastExclusion
=
exclusionStartIndex
[
atom1
+
1
];
for
(
int
atom2
=
atom1
+
1
;
atom2
<
numAtoms
;
atom2
++
)
{
for
(
int
atom2
=
atom1
+
1
;
atom2
<
numAtoms
;
atom2
++
)
{
...
@@ -424,8 +422,8 @@ extern "C" __global__ void computeForce(
...
@@ -424,8 +422,8 @@ extern "C" __global__ void computeForce(
int
index2
=
sortedAtoms
[
atom2
];
int
index2
=
sortedAtoms
[
atom2
];
AtomData
data2
;
AtomData
data2
;
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force2
=
0.0
f
;
real3
force2
=
make_real3
(
0
)
;
real3
torque2
=
0.0
f
;
real3
torque2
=
make_real3
(
0
)
;
// Compute the interaction.
// Compute the interaction.
...
@@ -460,8 +458,8 @@ extern "C" __global__ void computeForce(
...
@@ -460,8 +458,8 @@ extern "C" __global__ void computeForce(
AtomData
data1
,
data2
;
AtomData
data1
,
data2
;
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data1
,
atom1
,
index1
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
loadAtomData
(
&
data2
,
atom2
,
index2
,
pos
,
sigParams
,
epsParams
,
aMatrix
,
bMatrix
,
gMatrix
);
real3
force1
=
0
,
force2
=
0
;
real3
force1
=
make_real3
(
0
),
force2
=
make_real3
(
0
)
;
real3
torque1
=
0
,
torque2
=
0
;
real3
torque1
=
make_real3
(
0
),
torque2
=
make_real3
(
0
)
;
real3
delta
=
data1
.
pos
-
data2
.
pos
;
real3
delta
=
data1
.
pos
-
data2
.
pos
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -491,7 +489,7 @@ extern "C" __global__ void computeForce(
...
@@ -491,7 +489,7 @@ extern "C" __global__ void computeForce(
* Convert the torques to forces on the connected particles.
* Convert the torques to forces on the connected particles.
*/
*/
extern
"C"
__global__
void
applyTorques
(
extern
"C"
__global__
void
applyTorques
(
long
*
__restrict__
forceBuffers
,
long
*
__restrict__
torqueBuffers
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
int
numParticles
,
const
real4
*
__restrict__
posq
,
int2
*
const
__restrict__
axisParticleIndices
,
int
numParticles
,
const
real4
*
__restrict__
posq
,
int2
*
const
__restrict__
axisParticleIndices
,
const
int
*
sortedParticles
)
{
const
int
*
sortedParticles
)
{
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
...
@@ -504,7 +502,7 @@ extern "C" __global__ void applyTorques(
...
@@ -504,7 +502,7 @@ extern "C" __global__ void applyTorques(
real
scale
=
1
/
(
real
)
0x100000000
;
real
scale
=
1
/
(
real
)
0x100000000
;
real3
torque
=
make_real3
(
scale
*
torqueBuffers
[
originalIndex
],
scale
*
torqueBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
scale
*
torqueBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
]);
real3
torque
=
make_real3
(
scale
*
torqueBuffers
[
originalIndex
],
scale
*
torqueBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
scale
*
torqueBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
]);
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
real3
force
=
make_real3
(
0
),
xforce
=
make_real3
(
0
),
yforce
=
make_real3
(
0
)
;
// Apply a force to the x particle.
// Apply a force to the x particle.
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
24b68822
...
@@ -6384,7 +6384,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6384,7 +6384,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbor
s
");
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbor
Index
");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create array for accumulating torques.
// Create array for accumulating torques.
...
@@ -6514,7 +6514,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6514,7 +6514,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborIndex = NULL;
neighborIndex = NULL;
maxNeighborBlocks = (int) ceil((*count)*1.1);
maxNeighborBlocks = (int) ceil((*count)*1.1);
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbor
s
");
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbor
Index
");
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, neighbors->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, neighbors->getDeviceBuffer());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment