Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59bd8d19
Commit
59bd8d19
authored
Oct 02, 2015
by
peastman
Browse files
Merge pull request #1165 from peastman/mixedenergy
Further improved energy accuracy in mixed precision mode
parents
a20944f6
1f2b65da
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
40 additions
and
40 deletions
+40
-40
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+2
-2
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/kernels/customCentroidBond.cu
platforms/cuda/src/kernels/customCentroidBond.cu
+2
-2
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+2
-2
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
+2
-2
platforms/cuda/src/kernels/customHbondForce.cu
platforms/cuda/src/kernels/customHbondForce.cu
+3
-3
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+2
-2
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+2
-2
platforms/cuda/src/kernels/ewald.cu
platforms/cuda/src/kernels/ewald.cu
+2
-2
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+4
-4
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+2
-2
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+3
-3
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+2
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+2
-2
No files found.
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
59bd8d19
...
@@ -99,7 +99,7 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -99,7 +99,7 @@ void CudaBondedUtilities::initialize(const System& system) {
s
<<
CudaKernelSources
::
vectorOps
;
s
<<
CudaKernelSources
::
vectorOps
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
prefixCode
[
i
];
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer,
real
* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer,
mixed
* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
...
@@ -110,7 +110,7 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -110,7 +110,7 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
", "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
") {
\n
"
;
s
<<
"
real
energy = 0;
\n
"
;
s
<<
"
mixed
energy = 0;
\n
"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
s
<<
createForceSource
(
force
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
createForceSource
(
force
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
\n
"
;
s
<<
"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
\n
"
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
59bd8d19
...
@@ -368,7 +368,7 @@ void CudaContext::initialize() {
...
@@ -368,7 +368,7 @@ void CudaContext::initialize() {
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
59bd8d19
...
@@ -112,7 +112,7 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
...
@@ -112,7 +112,7 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
cu
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
CudaArray
&
energyArray
=
cu
.
getEnergyBuffer
();
CudaArray
&
energyArray
=
cu
.
getEnergyBuffer
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
)
{
double
*
energy
=
(
double
*
)
cu
.
getPinnedBuffer
();
double
*
energy
=
(
double
*
)
cu
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
...
...
platforms/cuda/src/kernels/customCentroidBond.cu
View file @
59bd8d19
...
@@ -104,10 +104,10 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -104,10 +104,10 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
/**
* Compute the forces on groups based on the bonds.
* Compute the forces on groups based on the bonds.
*/
*/
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
const
int
*
__restrict__
bondGroups
const
int
*
__restrict__
bondGroups
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
COMPUTE_FORCE
COMPUTE_FORCE
}
}
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
59bd8d19
...
@@ -13,7 +13,7 @@ typedef struct {
...
@@ -13,7 +13,7 @@ typedef struct {
/**
/**
* Compute a force based on pair interactions.
* Compute a force based on pair interactions.
*/
*/
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
View file @
59bd8d19
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
*/
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load the derivatives
// Load the derivatives
...
...
platforms/cuda/src/kernels/customHbondForce.cu
View file @
59bd8d19
...
@@ -66,12 +66,12 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -66,12 +66,12 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
/**
* Compute forces on donors.
* Compute forces on donors.
*/
*/
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
extern
__shared__
real4
posBuffer
[];
real
energy
=
0
;
mixed
energy
=
0
;
real3
f1
=
make_real3
(
0
);
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
...
@@ -155,7 +155,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
...
@@ -155,7 +155,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
/**
/**
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
59bd8d19
...
@@ -78,7 +78,7 @@ __constant__ float globals[NUM_GLOBALS];
...
@@ -78,7 +78,7 @@ __constant__ float globals[NUM_GLOBALS];
* Compute the interaction.
* Compute the interaction.
*/
*/
extern
"C"
__global__
void
computeInteraction
(
extern
"C"
__global__
void
computeInteraction
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
neighbors
,
const
int
*
__restrict__
neighborStartIndex
,
const
int
*
__restrict__
neighbors
,
const
int
*
__restrict__
neighborStartIndex
...
@@ -90,7 +90,7 @@ extern "C" __global__ void computeInteraction(
...
@@ -90,7 +90,7 @@ extern "C" __global__ void computeInteraction(
,
int
*
__restrict__
exclusions
,
int
*
__restrict__
exclusionStartIndex
,
int
*
__restrict__
exclusions
,
int
*
__restrict__
exclusionStartIndex
#endif
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
// Loop over particles to be the first one in the set.
// Loop over particles to be the first one in the set.
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
View file @
59bd8d19
...
@@ -9,14 +9,14 @@ typedef struct {
...
@@ -9,14 +9,14 @@ typedef struct {
}
AtomData
;
}
AtomData
;
extern
"C"
__global__
void
computeInteractionGroups
(
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
...
...
platforms/cuda/src/kernels/ewald.cu
View file @
59bd8d19
...
@@ -6,7 +6,7 @@ __device__ real2 multofReal2(real2 a, real2 b) {
...
@@ -6,7 +6,7 @@ __device__ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term.
* Precompute the cosine and sine sums which appear in each force term.
*/
*/
extern
"C"
__global__
void
calculateEwaldCosSinSums
(
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real2
*
__restrict__
cosSinSum
,
real4
periodicBoxSize
)
{
extern
"C"
__global__
void
calculateEwaldCosSinSums
(
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real2
*
__restrict__
cosSinSum
,
real4
periodicBoxSize
)
{
const
unsigned
int
ksizex
=
2
*
KMAX_X
-
1
;
const
unsigned
int
ksizex
=
2
*
KMAX_X
-
1
;
const
unsigned
int
ksizey
=
2
*
KMAX_Y
-
1
;
const
unsigned
int
ksizey
=
2
*
KMAX_Y
-
1
;
const
unsigned
int
ksizez
=
2
*
KMAX_Z
-
1
;
const
unsigned
int
ksizez
=
2
*
KMAX_Z
-
1
;
...
@@ -14,7 +14,7 @@ extern "C" __global__ void calculateEwaldCosSinSums(real* __restrict__ energyBuf
...
@@ -14,7 +14,7 @@ extern "C" __global__ void calculateEwaldCosSinSums(real* __restrict__ energyBuf
real3
reciprocalBoxSize
=
make_real3
(
2
*
M_PI
/
periodicBoxSize
.
x
,
2
*
M_PI
/
periodicBoxSize
.
y
,
2
*
M_PI
/
periodicBoxSize
.
z
);
real3
reciprocalBoxSize
=
make_real3
(
2
*
M_PI
/
periodicBoxSize
.
x
,
2
*
M_PI
/
periodicBoxSize
.
y
,
2
*
M_PI
/
periodicBoxSize
.
z
);
real
reciprocalCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
reciprocalCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
real
energy
=
0
;
mixed
energy
=
0
;
while
(
index
<
(
KMAX_Y
-
1
)
*
ksizez
+
KMAX_Z
)
while
(
index
<
(
KMAX_Y
-
1
)
*
ksizez
+
KMAX_Z
)
index
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
while
(
index
<
totalK
)
{
while
(
index
<
totalK
)
{
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
59bd8d19
...
@@ -33,9 +33,9 @@ extern "C" __global__ void reduceBornSum(float alpha, float beta, float gamma, c
...
@@ -33,9 +33,9 @@ extern "C" __global__ void reduceBornSum(float alpha, float beta, float gamma, c
* Reduce the Born force.
* Reduce the Born force.
*/
*/
extern
"C"
__global__
void
reduceBornForce
(
long
long
*
__restrict__
bornForce
,
real
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
reduceBornForce
(
long
long
*
__restrict__
bornForce
,
mixed
*
__restrict__
energyBuffer
,
const
float2
*
__restrict__
params
,
const
real
*
__restrict__
bornRadii
,
const
real
*
__restrict__
obcChain
)
{
const
float2
*
__restrict__
params
,
const
real
*
__restrict__
bornRadii
,
const
real
*
__restrict__
obcChain
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Get summed Born force
// Get summed Born force
...
@@ -402,7 +402,7 @@ typedef struct {
...
@@ -402,7 +402,7 @@ typedef struct {
*/
*/
extern
"C"
__global__
void
computeGBSAForce1
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
global_bornForce
,
extern
"C"
__global__
void
computeGBSAForce1
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
global_bornForce
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
...
@@ -415,7 +415,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -415,7 +415,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData2
localData
[
FORCE_WORK_GROUP_SIZE
];
__shared__
AtomData2
localData
[
FORCE_WORK_GROUP_SIZE
];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
59bd8d19
...
@@ -100,7 +100,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
...
@@ -100,7 +100,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*
*
*/
*/
extern
"C"
__global__
void
computeNonbonded
(
extern
"C"
__global__
void
computeNonbonded
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
@@ -583,6 +583,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -583,6 +583,6 @@ extern "C" __global__ void computeNonbonded(
pos
++
;
pos
++
;
}
}
#ifdef INCLUDE_ENERGY
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
(
real
)
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
#endif
}
}
\ No newline at end of file
platforms/cuda/src/kernels/pme.cu
View file @
59bd8d19
...
@@ -115,7 +115,7 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
...
@@ -115,7 +115,7 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
...
@@ -150,14 +150,14 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
...
@@ -150,14 +150,14 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern
"C"
__global__
void
extern
"C"
__global__
void
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
// real indices
int
kx
=
index
/
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
));
int
kx
=
index
/
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
));
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
59bd8d19
...
@@ -181,7 +181,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -181,7 +181,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
prefixCode
[
i
];
string
bufferType
=
(
context
.
getSupports64BitGlobalAtomics
()
?
"long"
:
"real4"
);
string
bufferType
=
(
context
.
getSupports64BitGlobalAtomics
()
?
"long"
:
"real4"
);
s
<<
"__kernel void computeBondedForces(__global "
<<
bufferType
<<
"* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
s
<<
"__kernel void computeBondedForces(__global "
<<
bufferType
<<
"* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
...
@@ -191,7 +191,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -191,7 +191,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
") {
\n
"
;
s
<<
"
real
energy = 0
.0f
;
\n
"
;
s
<<
"
mixed
energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
59bd8d19
...
@@ -460,7 +460,7 @@ void OpenCLContext::initialize() {
...
@@ -460,7 +460,7 @@ void OpenCLContext::initialize() {
else
{
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_
float
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
()),
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_
double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
()),
"energyBuffer"
);
}
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
59bd8d19
...
@@ -136,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
...
@@ -136,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
cl
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
cl
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
()
)
{
double
*
energy
=
(
double
*
)
cl
.
getPinnedBuffer
();
double
*
energy
=
(
double
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
View file @
59bd8d19
...
@@ -105,10 +105,10 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -105,10 +105,10 @@ real4 computeCross(real4 vec1, real4 vec2) {
/**
/**
*
Compute
the
forces
on
groups
based
on
the
bonds.
*
Compute
the
forces
on
groups
based
on
the
bonds.
*/
*/
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__global
const
int*
restrict
bondGroups
__global
const
int*
restrict
bondGroups
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
COMPUTE_FORCE
}
}
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
59bd8d19
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
...
@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
59bd8d19
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
...
@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
59bd8d19
...
@@ -9,9 +9,9 @@
...
@@ -9,9 +9,9 @@
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*/
*/
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
derivatives
//
Reduce
the
derivatives
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment