Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4949017b
Commit
4949017b
authored
Jul 27, 2016
by
Peter Eastman
Browse files
Continuing CUDA implementation of parameter derivatives
parent
eae8def5
Changes
9
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
218 additions
and
49 deletions
+218
-49
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+181
-39
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+6
-0
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
+2
-0
platforms/cuda/src/kernels/customGBGradientChainRule.cu
platforms/cuda/src/kernels/customGBGradientChainRule.cu
+2
-0
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+21
-7
platforms/cuda/src/kernels/customGBValuePerParticle.cu
platforms/cuda/src/kernels/customGBValuePerParticle.cu
+1
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+1
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
4949017b
...
...
@@ -826,13 +826,15 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaArray
*>
dValue0dParam
;
CudaArray
*
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
*
valueBuffers
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
4949017b
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
4949017b
...
...
@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
0.5
f
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
tj
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos
++
;
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
View file @
4949017b
...
...
@@ -5,6 +5,7 @@
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load the derivatives
...
...
@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBGradientChainRule.cu
View file @
4949017b
...
...
@@ -4,6 +4,7 @@
extern
"C"
__global__
void
computeGradientChainRuleTerms
(
long
long
*
__restrict__
forceBuffers
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
const
real
scale
=
RECIP
((
real
)
0x100000000
);
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
index
];
...
...
@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
force
.
z
*
0x100000000
);
}
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
4949017b
...
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value
+=
tempValue1
;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
...
...
@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd
(
&
global_value
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
atom1
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
atomicAdd
(
&
global_value
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
pos
++
;
}
...
...
platforms/cuda/src/kernels/customGBValuePerParticle.cu
View file @
4949017b
...
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
REDUCE_PARAM0_DERIV
// Now calculate other values
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
4949017b
...
...
@@ -3301,7 +3301,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string variableName = "dValuedParam_0_"+cl.intToString(i);
if (useLong) {
extraArgs << ", __global const long* restrict dValue0dParam" << i;
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam[index];\n";
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam
" << i << "
[index];\n";
}
else {
extraArgs << ", __global const real* restrict dValue0dParam" << i;
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
4949017b
...
...
@@ -320,7 +320,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
#
endif
#
ifdef
SUPPORTS_64_BIT_ATOMICS
unsigned
in
offset1
=
atom1
;
unsigned
in
t
offset1
=
atom1
;
atom_add
(
&global_value[offset1],
(
long
)
(
value*0x100000000
))
;
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment