Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a381a3ab
"serialization/tests/TestSerializeCMAPTorsion.cpp" did not exist on "d350fb551144c129d5d754cb496c63c927d9f3e5"
Commit
a381a3ab
authored
Aug 01, 2016
by
peastman
Browse files
Merge branch 'master' into gayberne
parents
5ecc8e00
1f7866ad
Changes
199
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
808 additions
and
264 deletions
+808
-264
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+47
-35
platforms/cuda/src/kernels/customGBValuePerParticle.cu
platforms/cuda/src/kernels/customGBValuePerParticle.cu
+1
-0
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+2
-1
platforms/cuda/src/kernels/customNonbonded.cu
platforms/cuda/src/kernels/customNonbonded.cu
+8
-5
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+47
-51
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+35
-28
platforms/opencl/include/OpenCLBondedUtilities.h
platforms/opencl/include/OpenCLBondedUtilities.h
+11
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+44
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+21
-3
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+14
-1
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+27
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+25
-8
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+5
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+400
-49
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+51
-19
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+4
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+2
-0
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+34
-30
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+28
-24
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+2
-0
No files found.
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
a381a3ab
...
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value
+=
tempValue1
;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
...
...
@@ -146,6 +151,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -167,39 +174,35 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
@@ -246,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -278,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -287,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd
(
&
global_value
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
atom1
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
atomicAdd
(
&
global_value
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
pos
++
;
}
...
...
platforms/cuda/src/kernels/customGBValuePerParticle.cu
View file @
a381a3ab
...
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
REDUCE_PARAM0_DERIV
// Now calculate other values
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
a381a3ab
...
...
@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
,
mixed
*
__restrict__
energyParamDerivs
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
platforms/cuda/src/kernels/customNonbonded.cu
View file @
a381a3ab
...
...
@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if
(
!
isExcluded
)
{
#endif
real
tempForce
=
0
;
COMPUTE_FORCE
real
switchValue
=
1
,
switchDeriv
=
0
;
#if USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r
-
SWITCH_CUTOFF
;
real
switchValue
=
1
+
x
*
x
*
x
*
(
SWITCH_C3
+
x
*
(
SWITCH_C4
+
x
*
SWITCH_C5
));
real
switchDeriv
=
x
*
x
*
(
3
*
SWITCH_C3
+
x
*
(
4
*
SWITCH_C4
+
x
*
5
*
SWITCH_C5
));
tempForce
=
tempForce
*
switchValue
-
tempEnergy
*
switchDeriv
;
tempEnergy
*=
switchValue
;
switchValue
=
1
+
x
*
x
*
x
*
(
SWITCH_C3
+
x
*
(
SWITCH_C4
+
x
*
SWITCH_C5
));
switchDeriv
=
x
*
x
*
(
3
*
SWITCH_C3
+
x
*
(
4
*
SWITCH_C4
+
x
*
5
*
SWITCH_C5
));
}
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce
=
tempForce
*
switchValue
-
tempEnergy
*
switchDeriv
;
tempEnergy
*=
switchValue
;
#endif
dEdR
+=
tempForce
*
invR
;
}
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
a381a3ab
...
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -225,39 +227,35 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
...
...
@@ -580,39 +580,35 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
a381a3ab
...
...
@@ -113,11 +113,14 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
mixed
energy
=
0
;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#endif
// First loop: process tiles that contain exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
...
...
@@ -173,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
#endif
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
0.5
f
;
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
#ifdef INCLUDE_FORCES
...
...
@@ -241,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
#endif
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
...
...
@@ -309,8 +314,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
...
...
@@ -335,39 +343,35 @@ extern "C" __global__ void computeNonbonded(
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
}
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
#else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
...
...
@@ -447,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
...
...
@@ -517,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
...
...
@@ -585,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
SAVE_DERIVATIVES
}
\ No newline at end of file
platforms/opencl/include/OpenCLBondedUtilities.h
View file @
a381a3ab
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -100,6 +100,15 @@ public:
* refer to it by this name.
*/
std
::
string
addArgument
(
cl
::
Memory
&
data
,
const
std
::
string
&
type
);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std
::
string
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
* Add some OpenCL code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
...
...
@@ -137,6 +146,7 @@ private:
std
::
vector
<
OpenCLArray
*>
atomIndices
;
std
::
vector
<
OpenCLArray
*>
bufferIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
bool
hasInitializedKernels
;
};
...
...
platforms/opencl/include/OpenCLContext.h
View file @
a381a3ab
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -264,6 +264,12 @@ public:
OpenCLArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
...
...
@@ -408,6 +414,18 @@ public:
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool
getForcesValid
()
const
{
return
forcesValid
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void
setForcesValid
(
bool
valid
)
{
forcesValid
=
valid
;
}
/**
* Get the number of atoms.
*/
...
...
@@ -647,6 +665,27 @@ public:
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
return
postComputations
;
}
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const
std
::
vector
<
std
::
string
>&
getEnergyParamDerivNames
()
const
{
return
energyParamDerivNames
;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std
::
map
<
std
::
string
,
double
>&
getEnergyParamDerivWorkspace
()
{
return
energyParamDerivWorkspace
;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
...
...
@@ -684,7 +723,7 @@ private:
int
numThreadBlocks
;
int
numForceBuffers
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
,
forcesValid
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
...
...
@@ -713,7 +752,10 @@ private:
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
a381a3ab
...
...
@@ -141,6 +141,12 @@ public:
* @param forces on exit, this contains the forces
*/
void
getForces
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
forces
);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void
getEnergyParameterDerivatives
(
ContextImpl
&
context
,
std
::
map
<
std
::
string
,
double
>&
derivs
);
/**
* Get the current periodic box vectors.
*
...
...
@@ -709,6 +715,7 @@ private:
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
...
...
@@ -801,13 +808,15 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivChain
;
std
::
vector
<
OpenCLParameterSet
*>
dValuedParam
;
std
::
vector
<
OpenCLArray
*>
dValue0dParam
;
OpenCLArray
*
longEnergyDerivs
;
OpenCLArray
*
globals
;
OpenCLArray
*
valueBuffers
;
...
...
@@ -953,6 +962,7 @@ public:
private:
int
numGroups
,
numBonds
;
bool
needEnergyParamDerivs
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
...
...
@@ -1339,7 +1349,7 @@ public:
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
OpenCLIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
sumBuffer
(
NULL
),
summedValue
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDof
Values
(
NULL
)
{
randomSeed
(
NULL
),
perDof
EnergyParamDerivs
(
NULL
),
perDofValues
(
NULL
),
needsEnergyParamDerivs
(
false
)
{
}
~
OpenCLIntegrateCustomStepKernel
();
/**
...
...
@@ -1404,8 +1414,11 @@ public:
private:
class
ReorderListener
;
class
GlobalTarget
;
class
DerivFunction
;
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
void
recordGlobalValue
(
double
value
,
GlobalTarget
target
);
void
recordChangedParameters
(
ContextImpl
&
context
);
bool
evaluateCondition
(
int
step
);
...
...
@@ -1413,18 +1426,23 @@ private:
double
energy
;
float
energyFloat
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
OpenCLArray
*
sumBuffer
;
OpenCLArray
*
summedValue
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
perDofEnergyParamDerivs
;
std
::
map
<
int
,
OpenCLArray
*>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
OpenCLParameterSet
*
perDofValues
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValuesFloat
;
mutable
std
::
vector
<
std
::
vector
<
cl_double
>
>
localPerDofValuesDouble
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivs
;
std
::
vector
<
std
::
string
>
perDofEnergyParamDerivNames
;
std
::
vector
<
cl_float
>
localPerDofEnergyParamDerivsFloat
;
std
::
vector
<
cl_double
>
localPerDofEnergyParamDerivsDouble
;
std
::
vector
<
float
>
globalValuesFloat
;
std
::
vector
<
double
>
globalValuesDouble
;
std
::
vector
<
double
>
initialGlobalVariables
;
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
a381a3ab
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
void
addArgument
(
const
ParameterInfo
&
parameter
);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std
::
string
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
*
...
...
@@ -281,9 +290,13 @@ private:
OpenCLArray
*
oldPositions
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLSort
*
blockSorter
;
cl
::
Event
downloadCountEvent
;
cl
::
Buffer
*
pinnedCountBuffer
;
int
*
pinnedCountMemory
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
map
<
int
,
double
>
groupCutoff
;
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
double
lastCutoff
;
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
a381a3ab
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
}
}
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
arguments
.
push_back
(
&
data
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
string
OpenCLBondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if the parameter has already been added.
int
index
;
for
(
index
=
0
;
index
<
energyParameterDerivatives
.
size
();
index
++
)
if
(
param
==
energyParameterDerivatives
[
index
])
break
;
if
(
index
==
energyParameterDerivatives
.
size
())
energyParameterDerivatives
.
push_back
(
param
);
context
.
addEnergyParameterDerivative
(
param
);
return
string
(
"energyParamDeriv"
)
+
context
.
intToString
(
index
);
}
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
if
(
prefixCode
[
i
]
==
source
)
...
...
@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
if
(
energyParameterDerivatives
.
size
()
>
0
)
s
<<
", __global mixed* restrict energyParamDerivs"
;
s
<<
") {
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
}
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
s
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
...
...
@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
}
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
{
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
a381a3ab
...
...
@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
...
...
@@ -179,10 +179,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
defaultOptimizationOptions
=
""
;
else
if
(
platformVendor
==
"Apple"
)
defaultOptimizationOptions
=
"-cl-mad-enable -cl-no-signed-zeros"
;
else
defaultOptimizationOptions
=
"-cl-
fast-relaxed-math
"
;
defaultOptimizationOptions
=
"-cl-
mad-enable -cl-no-signed-zeros
"
;
supports64BitGlobalAtomics
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_int64_base_atomics"
)
!=
string
::
npos
);
supportsDoublePrecision
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_fp64"
)
!=
string
::
npos
);
if
((
useDoublePrecision
||
useMixedPrecision
)
&&
!
supportsDoublePrecision
)
...
...
@@ -437,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
if
(
integration
!=
NULL
)
...
...
@@ -457,15 +457,16 @@ void OpenCLContext::initialize() {
numForceBuffers
=
std
::
max
(
numForceBuffers
,
bonded
->
getNumForceBuffers
());
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumE
nergyBuffer
s
())
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
e
nergyBuffer
Size
,
"energyBuffer"
);
}
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumE
nergyBuffer
s
())
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
e
nergyBuffer
Size
,
"energyBuffer"
);
}
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
...
...
@@ -477,7 +478,15 @@ void OpenCLContext::initialize() {
}
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
else
energyParamDerivBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
}
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBufferSize
*
energyBuffer
->
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
...
@@ -1052,7 +1061,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
nonbonded
->
updateNeighborListSize
();
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
@@ -1232,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations
.
push_back
(
computation
);
}
void
OpenCLContext
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if this parameter has already been registered.
for
(
int
i
=
0
;
i
<
energyParamDerivNames
.
size
();
i
++
)
if
(
param
==
energyParamDerivNames
[
i
])
return
;
energyParamDerivNames
.
push_back
(
param
);
}
struct
OpenCLContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
OpenCLContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
a381a3ab
...
...
@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"float4 c[4];
\n
"
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
...
@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"float4 c[16];
\n
"
;
for
(
int
j
=
0
;
j
<
16
;
j
++
)
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
a381a3ab
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a381a3ab
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks and force buffers to use.
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
...
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
}
}
pinnedCountBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
sizeof
(
int
));
pinnedCountMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
pinnedCountBuffer
,
CL_TRUE
,
CL_MAP_READ
,
0
,
sizeof
(
int
));
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
...
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
delete
pinnedCountBuffer
;
}
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
...
@@ -158,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments
.
push_back
(
parameter
);
}
string
OpenCLNonbondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if the parameter has already been added.
int
index
;
for
(
index
=
0
;
index
<
energyParameterDerivatives
.
size
();
index
++
)
if
(
param
==
energyParameterDerivatives
[
index
])
break
;
if
(
index
==
energyParameterDerivatives
.
size
())
energyParameterDerivatives
.
push_back
(
param
);
context
.
addEnergyParameterDerivative
(
param
);
return
string
(
"energyParamDeriv"
)
+
context
.
intToString
(
index
);
}
void
OpenCLNonbondedUtilities
::
requestExclusions
(
const
vector
<
vector
<
int
>
>&
exclusionList
)
{
if
(
anyExclusions
)
{
bool
sameExclusions
=
(
exclusionList
.
size
()
==
atomExclusions
.
size
());
...
...
@@ -357,20 +374,16 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
lastCutoff
=
kernels
.
cutoffDistance
;
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -385,20 +398,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
if
(
useCutoff
&&
numTiles
>
0
)
{
downloadCountEvent
.
wait
();
updateNeighborListSize
();
}
}
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
int
maxTiles
=
(
int
)
(
1.2
*
pinned
CountMemory
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
...
...
@@ -430,6 +445,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
}
...
...
@@ -588,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args
<<
arguments
[
i
].
getName
();
}
}
if
(
energyParameterDerivatives
.
size
()
>
0
)
args
<<
", __global mixed* restrict energyParamDerivs"
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
loadLocal1
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
...
...
@@ -638,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
}
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2j
.
str
();
stringstream
initDerivs
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
initDerivs
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
replacements
[
"INIT_DERIVATIVES"
]
=
initDerivs
.
str
();
stringstream
saveDerivs
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
saveDerivs
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
replacements
[
"SAVE_DERIVATIVES"
]
=
saveDerivs
.
str
();
map
<
string
,
string
>
defines
;
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
...
...
@@ -713,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
arguments
[
i
].
getMemory
());
}
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
return
kernel
;
}
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
a381a3ab
...
...
@@ -30,6 +30,10 @@
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
-prefactor*erfAlphaR
;
}
else
{
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
}
else
{
#
if
HAS_LENNARD_JONES
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
View file @
a381a3ab
...
...
@@ -116,10 +116,12 @@ __kernel void computeGroupForces(__global long* restrict groupForce, __global mi
__global
const
int*
restrict
bondGroups,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
/**
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
a381a3ab
...
...
@@ -32,6 +32,7 @@ __kernel void computeN2Energy(
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
@@ -73,6 +74,7 @@ __kernel void computeN2Energy(
atom2
=
y*TILE_SIZE+j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
0.5f
;
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
...
...
@@ -123,6 +125,7 @@ __kernel void computeN2Energy(
atom2 = y*TILE_SIZE+tj;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1.0f;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
...
...
@@ -181,6 +184,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
...
...
@@ -204,42 +209,38 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
while
(
skipTiles[tbx+TILE_SIZE-1]
<
pos
)
{
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
if
(
skipBase+tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[skipBase+tgx]
;
skipTiles[get_local_id
(
0
)
]
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
else
skipTiles[get_local_id
(
0
)
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
SYNC_WARPS
;
}
while
(
skipTiles[currentSkipIndex]
<
pos
)
currentSkipIndex++
;
includeTile
=
(
skipTiles[currentSkipIndex]
!=
pos
)
;
#
endif
if
(
includeTile
)
{
unsigned
int
atom1
=
x*TILE_SIZE
+
tgx
;
...
...
@@ -283,6 +284,7 @@ __kernel void computeN2Energy(
atom2
=
atomIndices[tbx+tj]
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
...
...
@@ -321,6 +323,7 @@ __kernel void computeN2Energy(
atom2
=
atomIndices[tbx+tj]
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
...
...
@@ -375,4 +378,5 @@ __kernel void computeN2Energy(
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
a381a3ab
...
...
@@ -28,6 +28,7 @@ __kernel void computeN2Energy(
#
endif
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
@@ -74,6 +75,7 @@ __kernel void computeN2Energy(
atom2
=
y*TILE_SIZE+j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
0.5f
;
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
...
...
@@ -140,6 +142,7 @@ __kernel void computeN2Energy(
atom2
=
y*TILE_SIZE+j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1.0f
;
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
|
| atom2 >= NUM_ATOMS || !(excl & 0x1));
if (!isExcluded) {
...
...
@@ -201,6 +204,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else
...
...
@@ -220,35 +225,31 @@ __kernel void computeN2Energy(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y
+=
(
x
<
y
?
-1
:
1
)
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
//
Skip
over
tiles
that
have
exclusions,
since
they
were
already
processed.
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
else
nextToSkip
=
end
;
while
(
nextToSkip
<
pos
)
{
if
(
currentSkipIndex
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles[currentSkipIndex++]
;
nextToSkip
=
tile.x
+
tile.y*NUM_BLOCKS
-
tile.y*
(
tile.y+1
)
/2
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
else
nextToSkip
=
end
;
}
includeTile
=
(
nextToSkip
!=
pos
)
;
#
endif
if
(
includeTile
)
{
//
Load
the
data
for
this
tile.
...
...
@@ -293,6 +294,7 @@ __kernel void computeN2Energy(
atom2
=
atomIndices[j]
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1.0f
;
COMPUTE_INTERACTION
dEdR
/=
-r
;
energy
+=
tempEnergy
;
...
...
@@ -349,6 +351,7 @@ __kernel void computeN2Energy(
atom2
=
atomIndices[j]
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1.0f
;
COMPUTE_INTERACTION
dEdR
/=
-r
;
energy
+=
tempEnergy
;
...
...
@@ -402,4 +405,5 @@ __kernel void computeN2Energy(
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
a381a3ab
...
...
@@ -12,6 +12,7 @@
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
derivatives
...
...
@@ -27,4 +28,5 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
index
+=
get_global_size
(
0
)
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment