Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a381a3ab
Commit
a381a3ab
authored
Aug 01, 2016
by
peastman
Browse files
Merge branch 'master' into gayberne
parents
5ecc8e00
1f7866ad
Changes
199
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
808 additions
and
264 deletions
+808
-264
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+47
-35
platforms/cuda/src/kernels/customGBValuePerParticle.cu
platforms/cuda/src/kernels/customGBValuePerParticle.cu
+1
-0
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+2
-1
platforms/cuda/src/kernels/customNonbonded.cu
platforms/cuda/src/kernels/customNonbonded.cu
+8
-5
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+47
-51
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+35
-28
platforms/opencl/include/OpenCLBondedUtilities.h
platforms/opencl/include/OpenCLBondedUtilities.h
+11
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+44
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+21
-3
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+14
-1
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+27
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+25
-8
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+5
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+400
-49
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+51
-19
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+4
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+2
-0
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+34
-30
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+28
-24
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+2
-0
No files found.
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
a381a3ab
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
COMPUTE_VALUE
}
}
value
+=
tempValue1
;
value
+=
tempValue1
;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
}
value
+=
tempValue1
;
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
}
}
...
@@ -146,6 +151,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -146,6 +151,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -167,16 +174,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -167,16 +174,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
@@ -199,7 +202,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -199,7 +202,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
@@ -246,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -246,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
}
value
+=
tempValue1
;
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
...
@@ -278,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -278,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
}
value
+=
tempValue1
;
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
@@ -287,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -287,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
// Write results.
atomicAdd
(
&
global_value
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
atom1
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
atomicAdd
(
&
global_value
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
unsigned
int
offset2
=
atom2
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
}
pos
++
;
pos
++
;
}
}
...
...
platforms/cuda/src/kernels/customGBValuePerParticle.cu
View file @
a381a3ab
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
// Load the pairwise value
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
REDUCE_PARAM0_DERIV
// Now calculate other values
// Now calculate other values
...
...
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
a381a3ab
...
@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
...
@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
,
mixed
*
__restrict__
energyParamDerivs
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
platforms/cuda/src/kernels/customNonbonded.cu
View file @
a381a3ab
...
@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
...
@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if
(
!
isExcluded
)
{
if
(
!
isExcluded
)
{
#endif
#endif
real
tempForce
=
0
;
real
tempForce
=
0
;
COMPUTE_FORCE
real
switchValue
=
1
,
switchDeriv
=
0
;
#if USE_SWITCH
#if USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r
-
SWITCH_CUTOFF
;
real
x
=
r
-
SWITCH_CUTOFF
;
real
switchValue
=
1
+
x
*
x
*
x
*
(
SWITCH_C3
+
x
*
(
SWITCH_C4
+
x
*
SWITCH_C5
));
switchValue
=
1
+
x
*
x
*
x
*
(
SWITCH_C3
+
x
*
(
SWITCH_C4
+
x
*
SWITCH_C5
));
real
switchDeriv
=
x
*
x
*
(
3
*
SWITCH_C3
+
x
*
(
4
*
SWITCH_C4
+
x
*
5
*
SWITCH_C5
));
switchDeriv
=
x
*
x
*
(
3
*
SWITCH_C3
+
x
*
(
4
*
SWITCH_C4
+
x
*
5
*
SWITCH_C5
));
}
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce
=
tempForce
*
switchValue
-
tempEnergy
*
switchDeriv
;
tempForce
=
tempForce
*
switchValue
-
tempEnergy
*
switchDeriv
;
tempEnergy
*=
switchValue
;
tempEnergy
*=
switchValue
;
}
#endif
#endif
dEdR
+=
tempForce
*
invR
;
dEdR
+=
tempForce
*
invR
;
}
}
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
a381a3ab
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -225,16 +227,12 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -225,16 +227,12 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
@@ -257,7 +255,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -257,7 +255,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -580,16 +580,12 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -580,16 +580,12 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
@@ -612,7 +608,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -612,7 +608,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
a381a3ab
...
@@ -113,11 +113,14 @@ extern "C" __global__ void computeNonbonded(
...
@@ -113,11 +113,14 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
mixed
energy
=
0
;
mixed
energy
=
0
;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#endif
#endif
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
...
@@ -173,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -173,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
0.5
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
energy
+=
0.5
f
*
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
...
@@ -241,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -241,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
...
@@ -309,8 +314,11 @@ extern "C" __global__ void computeNonbonded(
...
@@ -309,8 +314,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
// of them (no cutoff).
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -335,16 +343,12 @@ extern "C" __global__ void computeNonbonded(
...
@@ -335,16 +343,12 @@ extern "C" __global__ void computeNonbonded(
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
}
#else
else
#endif
{
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
...
@@ -367,7 +371,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -367,7 +371,7 @@ extern "C" __global__ void computeNonbonded(
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
}
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
// Load atom data for this tile.
...
@@ -447,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -447,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
...
@@ -517,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -517,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
const
real
interactionScale
=
1.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
...
@@ -585,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
...
@@ -585,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
#ifdef INCLUDE_ENERGY
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
#endif
SAVE_DERIVATIVES
}
}
\ No newline at end of file
platforms/opencl/include/OpenCLBondedUtilities.h
View file @
a381a3ab
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -100,6 +100,15 @@ public:
...
@@ -100,6 +100,15 @@ public:
* refer to it by this name.
* refer to it by this name.
*/
*/
std
::
string
addArgument
(
cl
::
Memory
&
data
,
const
std
::
string
&
type
);
std
::
string
addArgument
(
cl
::
Memory
&
data
,
const
std
::
string
&
type
);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std
::
string
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
/**
* Add some OpenCL code that should be included in the program, before the start of the kernel.
* Add some OpenCL code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
...
@@ -137,6 +146,7 @@ private:
...
@@ -137,6 +146,7 @@ private:
std
::
vector
<
OpenCLArray
*>
atomIndices
;
std
::
vector
<
OpenCLArray
*>
atomIndices
;
std
::
vector
<
OpenCLArray
*>
bufferIndices
;
std
::
vector
<
OpenCLArray
*>
bufferIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
};
};
...
...
platforms/opencl/include/OpenCLContext.h
View file @
a381a3ab
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -264,6 +264,12 @@ public:
...
@@ -264,6 +264,12 @@ public:
OpenCLArray
&
getEnergyBuffer
()
{
OpenCLArray
&
getEnergyBuffer
()
{
return
*
energyBuffer
;
return
*
energyBuffer
;
}
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray
&
getEnergyParamDerivBuffer
()
{
return
*
energyParamDerivBuffer
;
}
/**
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
...
@@ -408,6 +414,18 @@ public:
...
@@ -408,6 +414,18 @@ public:
void
setStepsSinceReorder
(
int
steps
)
{
void
setStepsSinceReorder
(
int
steps
)
{
stepsSinceReorder
=
steps
;
stepsSinceReorder
=
steps
;
}
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool
getForcesValid
()
const
{
return
forcesValid
;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void
setForcesValid
(
bool
valid
)
{
forcesValid
=
valid
;
}
/**
/**
* Get the number of atoms.
* Get the number of atoms.
*/
*/
...
@@ -647,6 +665,27 @@ public:
...
@@ -647,6 +665,27 @@ public:
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
std
::
vector
<
ForcePostComputation
*>&
getPostComputations
()
{
return
postComputations
;
return
postComputations
;
}
}
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const
std
::
vector
<
std
::
string
>&
getEnergyParamDerivNames
()
const
{
return
energyParamDerivNames
;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std
::
map
<
std
::
string
,
double
>&
getEnergyParamDerivWorkspace
()
{
return
energyParamDerivWorkspace
;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
* This should be called whenever force field parameters change. It will cause the definitions
...
@@ -684,7 +723,7 @@ private:
...
@@ -684,7 +723,7 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
,
forcesValid
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
...
@@ -713,7 +752,10 @@ private:
...
@@ -713,7 +752,10 @@ private:
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
atomIndexDevice
;
std
::
vector
<
std
::
string
>
energyParamDerivNames
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
int
>
atomIndex
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
int
>
autoclearBufferSizes
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
a381a3ab
...
@@ -141,6 +141,12 @@ public:
...
@@ -141,6 +141,12 @@ public:
* @param forces on exit, this contains the forces
* @param forces on exit, this contains the forces
*/
*/
void
getForces
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
forces
);
void
getForces
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
forces
);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void
getEnergyParameterDerivatives
(
ContextImpl
&
context
,
std
::
map
<
std
::
string
,
double
>&
derivs
);
/**
/**
* Get the current periodic box vectors.
* Get the current periodic box vectors.
*
*
...
@@ -709,6 +715,7 @@ private:
...
@@ -709,6 +715,7 @@ private:
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
CustomNonbondedForce
*
forceCopy
;
...
@@ -801,13 +808,15 @@ public:
...
@@ -801,13 +808,15 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
private:
double
cutoff
;
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
int
maxTiles
,
numComputedValues
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
computedValues
;
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivs
;
OpenCLParameterSet
*
energyDerivChain
;
OpenCLParameterSet
*
energyDerivChain
;
std
::
vector
<
OpenCLParameterSet
*>
dValuedParam
;
std
::
vector
<
OpenCLArray
*>
dValue0dParam
;
OpenCLArray
*
longEnergyDerivs
;
OpenCLArray
*
longEnergyDerivs
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
OpenCLArray
*
valueBuffers
;
OpenCLArray
*
valueBuffers
;
...
@@ -953,6 +962,7 @@ public:
...
@@ -953,6 +962,7 @@ public:
private:
private:
int
numGroups
,
numBonds
;
int
numGroups
,
numBonds
;
bool
needEnergyParamDerivs
;
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
...
@@ -1339,7 +1349,7 @@ public:
...
@@ -1339,7 +1349,7 @@ public:
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
OpenCLIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cl
(
cl
),
OpenCLIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
sumBuffer
(
NULL
),
summedValue
(
NULL
),
uniformRandoms
(
NULL
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
sumBuffer
(
NULL
),
summedValue
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDof
Values
(
NULL
)
{
randomSeed
(
NULL
),
perDof
EnergyParamDerivs
(
NULL
),
perDofValues
(
NULL
),
needsEnergyParamDerivs
(
false
)
{
}
}
~
OpenCLIntegrateCustomStepKernel
();
~
OpenCLIntegrateCustomStepKernel
();
/**
/**
...
@@ -1404,8 +1414,11 @@ public:
...
@@ -1404,8 +1414,11 @@ public:
private:
private:
class
ReorderListener
;
class
ReorderListener
;
class
GlobalTarget
;
class
GlobalTarget
;
class
DerivFunction
;
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
void
recordGlobalValue
(
double
value
,
GlobalTarget
target
);
void
recordGlobalValue
(
double
value
,
GlobalTarget
target
);
void
recordChangedParameters
(
ContextImpl
&
context
);
void
recordChangedParameters
(
ContextImpl
&
context
);
bool
evaluateCondition
(
int
step
);
bool
evaluateCondition
(
int
step
);
...
@@ -1413,18 +1426,23 @@ private:
...
@@ -1413,18 +1426,23 @@ private:
double
energy
;
double
energy
;
float
energyFloat
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
OpenCLArray
*
globalValues
;
OpenCLArray
*
sumBuffer
;
OpenCLArray
*
sumBuffer
;
OpenCLArray
*
summedValue
;
OpenCLArray
*
summedValue
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
perDofEnergyParamDerivs
;
std
::
map
<
int
,
OpenCLArray
*>
savedForces
;
std
::
map
<
int
,
OpenCLArray
*>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
std
::
set
<
int
>
validSavedForces
;
OpenCLParameterSet
*
perDofValues
;
OpenCLParameterSet
*
perDofValues
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValuesFloat
;
mutable
std
::
vector
<
std
::
vector
<
cl_float
>
>
localPerDofValuesFloat
;
mutable
std
::
vector
<
std
::
vector
<
cl_double
>
>
localPerDofValuesDouble
;
mutable
std
::
vector
<
std
::
vector
<
cl_double
>
>
localPerDofValuesDouble
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivs
;
std
::
vector
<
std
::
string
>
perDofEnergyParamDerivNames
;
std
::
vector
<
cl_float
>
localPerDofEnergyParamDerivsFloat
;
std
::
vector
<
cl_double
>
localPerDofEnergyParamDerivsDouble
;
std
::
vector
<
float
>
globalValuesFloat
;
std
::
vector
<
float
>
globalValuesFloat
;
std
::
vector
<
double
>
globalValuesDouble
;
std
::
vector
<
double
>
globalValuesDouble
;
std
::
vector
<
double
>
initialGlobalVariables
;
std
::
vector
<
double
>
initialGlobalVariables
;
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
a381a3ab
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -88,6 +88,15 @@ public:
...
@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
*/
void
addArgument
(
const
ParameterInfo
&
parameter
);
void
addArgument
(
const
ParameterInfo
&
parameter
);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std
::
string
addEnergyParameterDerivative
(
const
std
::
string
&
param
);
/**
/**
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
*
*
...
@@ -281,9 +290,13 @@ private:
...
@@ -281,9 +290,13 @@ private:
OpenCLArray
*
oldPositions
;
OpenCLArray
*
oldPositions
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLArray
*
rebuildNeighborList
;
OpenCLSort
*
blockSorter
;
OpenCLSort
*
blockSorter
;
cl
::
Event
downloadCountEvent
;
cl
::
Buffer
*
pinnedCountBuffer
;
int
*
pinnedCountMemory
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
map
<
int
,
double
>
groupCutoff
;
std
::
map
<
int
,
double
>
groupCutoff
;
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
double
lastCutoff
;
double
lastCutoff
;
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
a381a3ab
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
...
@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
}
}
}
}
std
::
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
string
OpenCLBondedUtilities
::
addArgument
(
cl
::
Memory
&
data
,
const
string
&
type
)
{
arguments
.
push_back
(
&
data
);
arguments
.
push_back
(
&
data
);
argTypes
.
push_back
(
type
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
}
string
OpenCLBondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if the parameter has already been added.
int
index
;
for
(
index
=
0
;
index
<
energyParameterDerivatives
.
size
();
index
++
)
if
(
param
==
energyParameterDerivatives
[
index
])
break
;
if
(
index
==
energyParameterDerivatives
.
size
())
energyParameterDerivatives
.
push_back
(
param
);
context
.
addEnergyParameterDerivative
(
param
);
return
string
(
"energyParamDeriv"
)
+
context
.
intToString
(
index
);
}
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
void
OpenCLBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
if
(
prefixCode
[
i
]
==
source
)
if
(
prefixCode
[
i
]
==
source
)
...
@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
if
(
energyParameterDerivatives
.
size
()
>
0
)
s
<<
", __global mixed* restrict energyParamDerivs"
;
s
<<
") {
\n
"
;
s
<<
") {
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
}
}
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
s
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
s
<<
"}
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
...
@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
...
@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
}
}
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
}
}
}
}
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
{
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
a381a3ab
...
@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
...
@@ -179,10 +179,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -179,10 +179,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
if
(
platformVendor
.
size
()
>=
5
&&
platformVendor
.
substr
(
0
,
5
)
==
"Intel"
)
defaultOptimizationOptions
=
""
;
defaultOptimizationOptions
=
""
;
else
if
(
platformVendor
==
"Apple"
)
defaultOptimizationOptions
=
"-cl-mad-enable -cl-no-signed-zeros"
;
else
else
defaultOptimizationOptions
=
"-cl-
fast-relaxed-math
"
;
defaultOptimizationOptions
=
"-cl-
mad-enable -cl-no-signed-zeros
"
;
supports64BitGlobalAtomics
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_int64_base_atomics"
)
!=
string
::
npos
);
supports64BitGlobalAtomics
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_int64_base_atomics"
)
!=
string
::
npos
);
supportsDoublePrecision
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_fp64"
)
!=
string
::
npos
);
supportsDoublePrecision
=
(
device
.
getInfo
<
CL_DEVICE_EXTENSIONS
>
().
find
(
"cl_khr_fp64"
)
!=
string
::
npos
);
if
((
useDoublePrecision
||
useMixedPrecision
)
&&
!
supportsDoublePrecision
)
if
((
useDoublePrecision
||
useMixedPrecision
)
&&
!
supportsDoublePrecision
)
...
@@ -437,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
...
@@ -437,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
delete
longForceBuffer
;
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
delete
energyBuffer
;
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
delete
atomIndexDevice
;
delete
atomIndexDevice
;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
...
@@ -457,15 +457,16 @@ void OpenCLContext::initialize() {
...
@@ -457,15 +457,16 @@ void OpenCLContext::initialize() {
numForceBuffers
=
std
::
max
(
numForceBuffers
,
bonded
->
getNumForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
bonded
->
getNumForceBuffers
());
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumE
nergyBuffer
s
())
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
e
nergyBuffer
Size
,
"energyBuffer"
);
}
}
else
{
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumE
nergyBuffer
s
())
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
e
nergyBuffer
Size
,
"energyBuffer"
);
}
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
...
@@ -477,7 +478,15 @@ void OpenCLContext::initialize() {
...
@@ -477,7 +478,15 @@ void OpenCLContext::initialize() {
}
}
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
forceBuffers
);
addAutoclearBuffer
(
*
energyBuffer
);
addAutoclearBuffer
(
*
energyBuffer
);
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
int
numEnergyParamDerivs
=
energyParamDerivNames
.
size
();
if
(
numEnergyParamDerivs
>
0
)
{
if
(
useDoublePrecision
||
useMixedPrecision
)
energyParamDerivBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
else
energyParamDerivBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
numEnergyParamDerivs
*
energyBufferSize
,
"energyParamDerivBuffer"
);
addAutoclearBuffer
(
*
energyParamDerivBuffer
);
}
int
bufferBytes
=
max
(
velm
->
getSize
()
*
velm
->
getElementSize
(),
energyBufferSize
*
energyBuffer
->
getElementSize
());
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedBuffer
=
new
cl
::
Buffer
(
context
,
CL_MEM_ALLOC_HOST_PTR
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
pinnedMemory
=
currentQueue
.
enqueueMapBuffer
(
*
pinnedBuffer
,
CL_TRUE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
0
,
bufferBytes
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
...
@@ -1052,7 +1061,6 @@ void OpenCLContext::reorderAtoms() {
...
@@ -1052,7 +1061,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_double
,
mm_double4
>
();
else
else
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
reorderAtomsImpl
<
cl_float
,
mm_float4
,
cl_float
,
mm_float4
>
();
nonbonded
->
updateNeighborListSize
();
}
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
@@ -1232,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
...
@@ -1232,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations
.
push_back
(
computation
);
postComputations
.
push_back
(
computation
);
}
}
void
OpenCLContext
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if this parameter has already been registered.
for
(
int
i
=
0
;
i
<
energyParamDerivNames
.
size
();
i
++
)
if
(
param
==
energyParamDerivNames
[
i
])
return
;
energyParamDerivNames
.
push_back
(
param
);
}
struct
OpenCLContext
::
WorkThread
::
ThreadData
{
struct
OpenCLContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
OpenCLContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
ThreadData
(
std
::
queue
<
OpenCLContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
a381a3ab
...
@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"float4 c[4];
\n
"
;
out
<<
"float4 c[4];
\n
"
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
...
@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
");
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"float4 c[16];
\n
"
;
out
<<
"float4 c[16];
\n
"
;
for
(
int
j
=
0
;
j
<
16
;
j
++
)
for
(
int
j
=
0
;
j
<
16
;
j
++
)
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
a381a3ab
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a381a3ab
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -57,7 +57,7 @@ private:
...
@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
OpenCLNonbondedUtilities
::
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
numForceBuffers
(
0
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
pinnedCountMemory
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks and force buffers to use.
// Decide how many thread blocks and force buffers to use.
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
deviceIsCpu
=
(
context
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
...
@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
numForceBuffers
=
numForceThreadBlocks
*
forceThreadBlockSize
/
OpenCLContext
::
TileSize
;
}
}
}
}
pinnedCountBuffer
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_ALLOC_HOST_PTR
,
sizeof
(
int
));
pinnedCountMemory
=
(
int
*
)
context
.
getQueue
().
enqueueMapBuffer
(
*
pinnedCountBuffer
,
CL_TRUE
,
CL_MAP_READ
,
0
,
sizeof
(
int
));
}
}
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
OpenCLNonbondedUtilities
::~
OpenCLNonbondedUtilities
()
{
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
...
@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete
rebuildNeighborList
;
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
delete
pinnedCountBuffer
;
}
}
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
OpenCLNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
@@ -158,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
...
@@ -158,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments
.
push_back
(
parameter
);
arguments
.
push_back
(
parameter
);
}
}
string
OpenCLNonbondedUtilities
::
addEnergyParameterDerivative
(
const
string
&
param
)
{
// See if the parameter has already been added.
int
index
;
for
(
index
=
0
;
index
<
energyParameterDerivatives
.
size
();
index
++
)
if
(
param
==
energyParameterDerivatives
[
index
])
break
;
if
(
index
==
energyParameterDerivatives
.
size
())
energyParameterDerivatives
.
push_back
(
param
);
context
.
addEnergyParameterDerivative
(
param
);
return
string
(
"energyParamDeriv"
)
+
context
.
intToString
(
index
);
}
void
OpenCLNonbondedUtilities
::
requestExclusions
(
const
vector
<
vector
<
int
>
>&
exclusionList
)
{
void
OpenCLNonbondedUtilities
::
requestExclusions
(
const
vector
<
vector
<
int
>
>&
exclusionList
)
{
if
(
anyExclusions
)
{
if
(
anyExclusions
)
{
bool
sameExclusions
=
(
exclusionList
.
size
()
==
atomExclusions
.
size
());
bool
sameExclusions
=
(
exclusionList
.
size
()
==
atomExclusions
.
size
());
...
@@ -357,8 +374,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -357,8 +374,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
...
@@ -367,10 +382,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -367,10 +382,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
context
.
getQueue
().
enqueueReadBuffer
(
interactionCount
->
getDeviceBuffer
(),
CL_FALSE
,
0
,
sizeof
(
int
),
pinnedCountMemory
,
NULL
,
&
downloadCountEvent
);
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
@@ -385,20 +398,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
...
@@ -385,20 +398,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
if
(
useCutoff
&&
numTiles
>
0
)
{
downloadCountEvent
.
wait
();
updateNeighborListSize
();
}
}
}
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
false
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
if
(
pinnedCountMemory
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
false
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
// this from happening in the future.
int
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
int
maxTiles
=
(
int
)
(
1.2
*
pinned
CountMemory
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
maxTiles
=
totalTiles
;
...
@@ -430,6 +445,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -430,6 +445,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
}
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
return
true
;
}
}
...
@@ -588,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -588,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args
<<
arguments
[
i
].
getName
();
args
<<
arguments
[
i
].
getName
();
}
}
}
}
if
(
energyParameterDerivatives
.
size
()
>
0
)
args
<<
", __global mixed* restrict energyParamDerivs"
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
loadLocal1
;
stringstream
loadLocal1
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
...
@@ -638,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -638,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
}
}
}
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2j
.
str
();
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2j
.
str
();
stringstream
initDerivs
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
initDerivs
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
replacements
[
"INIT_DERIVATIVES"
]
=
initDerivs
.
str
();
stringstream
saveDerivs
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
saveDerivs
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
replacements
[
"SAVE_DERIVATIVES"
]
=
saveDerivs
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
if
(
useCutoff
)
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
...
@@ -713,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -713,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
arguments
[
i
].
getMemory
());
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
arguments
[
i
].
getMemory
());
}
}
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
return
kernel
;
return
kernel
;
}
}
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
a381a3ab
...
@@ -30,6 +30,10 @@
...
@@ -30,6 +30,10 @@
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
-prefactor*
(
erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
-prefactor*erfAlphaR
;
tempEnergy
+=
-prefactor*erfAlphaR
;
}
}
else
{
includeInteraction
=
false
;
tempEnergy
-=
TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w
;
}
}
}
else
{
else
{
#
if
HAS_LENNARD_JONES
#
if
HAS_LENNARD_JONES
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
View file @
a381a3ab
...
@@ -116,10 +116,12 @@ __kernel void computeGroupForces(__global long* restrict groupForce, __global mi
...
@@ -116,10 +116,12 @@ __kernel void computeGroupForces(__global long* restrict groupForce, __global mi
__global
const
int*
restrict
bondGroups,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
__global
const
int*
restrict
bondGroups,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
COMPUTE_FORCE
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
}
/**
/**
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
a381a3ab
This diff is collapsed.
Click to expand it.
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
a381a3ab
This diff is collapsed.
Click to expand it.
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
a381a3ab
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
derivatives
//
Reduce
the
derivatives
...
@@ -27,4 +28,5 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
...
@@ -27,4 +28,5 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_PARAM_DERIVS
}
}
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment