Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4949017b
"openmmapi/vscode:/vscode.git/clone" did not exist on "b3be7aecfd3b98a68c503e333bf6f9ea7b41abee"
Commit
4949017b
authored
Jul 27, 2016
by
Peter Eastman
Browse files
Continuing CUDA implementation of parameter derivatives
parent
eae8def5
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
218 additions
and
49 deletions
+218
-49
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+3
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+181
-39
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+6
-0
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
+2
-0
platforms/cuda/src/kernels/customGBGradientChainRule.cu
platforms/cuda/src/kernels/customGBGradientChainRule.cu
+2
-0
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+21
-7
platforms/cuda/src/kernels/customGBValuePerParticle.cu
platforms/cuda/src/kernels/customGBValuePerParticle.cu
+1
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+1
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
4949017b
...
...
@@ -826,13 +826,15 @@ public:
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
,
needEnergyParamDerivs
;
int
maxTiles
,
numComputedValues
;
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaArray
*>
dValue0dParam
;
CudaArray
*
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
*
valueBuffers
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
4949017b
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
4949017b
...
...
@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
j
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
0.5
f
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
y
*
TILE_SIZE
+
tj
;
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
);
#endif
...
...
@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
real
tempEnergy
=
0
;
const
real
interactionScale
=
1
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-
r
;
...
...
@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos
++
;
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
View file @
4949017b
...
...
@@ -5,6 +5,7 @@
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load the derivatives
...
...
@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBGradientChainRule.cu
View file @
4949017b
...
...
@@ -4,6 +4,7 @@
extern
"C"
__global__
void
computeGradientChainRuleTerms
(
long
long
*
__restrict__
forceBuffers
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
const
real
scale
=
RECIP
((
real
)
0x100000000
);
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
index
];
...
...
@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
force
.
z
*
0x100000000
);
}
SAVE_PARAM_DERIVS
}
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
4949017b
...
...
@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value
+=
tempValue1
;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
...
...
@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value
+=
tempValue1
;
localData
[
tbx
+
tj
].
value
+=
tempValue2
;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd
(
&
global_value
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
unsigned
int
offset1
=
atom1
;
atomicAdd
(
&
global_value
[
offset1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
value
*
0x100000000
)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
atomicAdd
(
&
global_value
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
atomicAdd
(
&
global_value
[
offset2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
value
*
0x100000000
)));
STORE_PARAM_DERIVS2
}
}
pos
++
;
}
...
...
platforms/cuda/src/kernels/customGBValuePerParticle.cu
View file @
4949017b
...
...
@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
REDUCE_PARAM0_DERIV
// Now calculate other values
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
4949017b
...
...
@@ -3301,7 +3301,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string variableName = "dValuedParam_0_"+cl.intToString(i);
if (useLong) {
extraArgs << ", __global const long* restrict dValue0dParam" << i;
deriv0
<<
"real "
<<
variableName
<<
" = (1.0f/0x100000000)*dValue0dParam[index];
\n
"
;
deriv0 << "real " << variableName << " = (1.0f/0x100000000)*dValue0dParam
" << i << "
[index];\n";
}
else {
extraArgs << ", __global const real* restrict dValue0dParam" << i;
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
4949017b
...
...
@@ -320,7 +320,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
#
endif
#
ifdef
SUPPORTS_64_BIT_ATOMICS
unsigned
in
offset1
=
atom1
;
unsigned
in
t
offset1
=
atom1
;
atom_add
(
&global_value[offset1],
(
long
)
(
value*0x100000000
))
;
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment