Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
18fb6efc
Commit
18fb6efc
authored
Jul 10, 2012
by
Peter Eastman
Browse files
Implemented mixed precision mode
parent
76716f1c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
93 additions
and
33 deletions
+93
-33
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+29
-8
platforms/cuda2/src/CudaNonbondedUtilities.cpp
platforms/cuda2/src/CudaNonbondedUtilities.cpp
+13
-1
platforms/cuda2/src/CudaPlatform.cpp
platforms/cuda2/src/CudaPlatform.cpp
+6
-6
platforms/cuda2/src/kernels/customGBEnergyN2.cu
platforms/cuda2/src/kernels/customGBEnergyN2.cu
+13
-6
platforms/cuda2/src/kernels/gbsaObc1.cu
platforms/cuda2/src/kernels/gbsaObc1.cu
+4
-2
platforms/cuda2/src/kernels/nonbonded.cu
platforms/cuda2/src/kernels/nonbonded.cu
+28
-10
No files found.
platforms/cuda2/src/CudaKernels.cpp
View file @
18fb6efc
...
@@ -1977,7 +1977,20 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -1977,7 +1977,20 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
nb
.
getForceThreadBlockSize
());
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
nb
.
getForceThreadBlockSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
gbsaObc1
,
defines
);
map
<
string
,
string
>
replacements
;
stringstream
defineAccum
;
if
(
cu
.
getAccumulateInDouble
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double4 accum4;
\n
"
;
defines
[
"make_accum4"
]
=
"make_double4"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real4 accum4;
\n
"
;
defines
[
"make_accum4"
]
=
"make_real4"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
gbsaObc1
,
replacements
),
defines
);
computeBornSumKernel
=
cu
.
getKernel
(
module
,
"computeBornSum"
);
computeBornSumKernel
=
cu
.
getKernel
(
module
,
"computeBornSum"
);
computeSumArgs
.
push_back
(
&
bornSum
->
getDevicePointer
());
computeSumArgs
.
push_back
(
&
bornSum
->
getDevicePointer
());
computeSumArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
computeSumArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
...
@@ -2422,7 +2435,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -2422,7 +2435,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
string
n2EnergyStr
=
n2EnergySource
.
str
();
string
n2EnergyStr
=
n2EnergySource
.
str
();
replacements
[
"COMPUTE_INTERACTION"
]
=
n2EnergyStr
;
replacements
[
"COMPUTE_INTERACTION"
]
=
n2EnergyStr
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
clearLocal
,
load1
,
load2
,
declare1
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
,
declareTemps
,
setTemps
;
stringstream
extraArgs
,
atomParams
,
loadLocal1
,
loadLocal2
,
clearLocal
,
load1
,
load2
,
declare1
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* globals"
;
extraArgs
<<
", const float* globals"
;
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
pairEnergyUsesParam
.
resize
(
params
->
getBuffers
().
size
(),
false
);
...
@@ -2459,15 +2472,13 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -2459,15 +2472,13 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs
<<
", unsigned long long* __restrict__ derivBuffers"
;
extraArgs
<<
", unsigned long long* __restrict__ derivBuffers"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
index
=
cu
.
intToString
(
i
+
1
);
string
index
=
cu
.
intToString
(
i
+
1
);
atomParams
<<
"
real
deriv"
<<
index
<<
";
\n
"
;
atomParams
<<
"
accum
deriv"
<<
index
<<
";
\n
"
;
clearLocal
<<
"localData[localAtomIndex].deriv"
<<
index
<<
" = 0;
\n
"
;
clearLocal
<<
"localData[localAtomIndex].deriv"
<<
index
<<
" = 0;
\n
"
;
declare1
<<
"
real
deriv"
<<
index
<<
"_1 = 0;
\n
"
;
declare1
<<
"
accum
deriv"
<<
index
<<
"_1 = 0;
\n
"
;
load2
<<
"real deriv"
<<
index
<<
"_2 = 0;
\n
"
;
load2
<<
"real deriv"
<<
index
<<
"_2 = 0;
\n
"
;
recordDeriv
<<
"localData[atom2].deriv"
<<
index
<<
" += deriv"
<<
index
<<
"_2;
\n
"
;
recordDeriv
<<
"localData[atom2].deriv"
<<
index
<<
" += deriv"
<<
index
<<
"_2;
\n
"
;
storeDerivs1
<<
"STORE_DERIVATIVE_1("
<<
index
<<
")
\n
"
;
storeDerivs1
<<
"STORE_DERIVATIVE_1("
<<
index
<<
")
\n
"
;
storeDerivs2
<<
"STORE_DERIVATIVE_2("
<<
index
<<
")
\n
"
;
storeDerivs2
<<
"STORE_DERIVATIVE_2("
<<
index
<<
")
\n
"
;
declareTemps
<<
"__local real tempDerivBuffer"
<<
index
<<
"[64];
\n
"
;
setTemps
<<
"tempDerivBuffer"
<<
index
<<
"[threadIdx.x] = deriv"
<<
index
<<
"_1;
\n
"
;
atomParamSize
++
;
atomParamSize
++
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
...
@@ -2481,9 +2492,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -2481,9 +2492,19 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
replacements
[
"RECORD_DERIVATIVE_2"
]
=
recordDeriv
.
str
();
replacements
[
"RECORD_DERIVATIVE_2"
]
=
recordDeriv
.
str
();
replacements
[
"STORE_DERIVATIVES_1"
]
=
storeDerivs1
.
str
();
replacements
[
"STORE_DERIVATIVES_1"
]
=
storeDerivs1
.
str
();
replacements
[
"STORE_DERIVATIVES_2"
]
=
storeDerivs2
.
str
();
replacements
[
"STORE_DERIVATIVES_2"
]
=
storeDerivs2
.
str
();
replacements
[
"DECLARE_TEMP_BUFFERS"
]
=
declareTemps
.
str
();
replacements
[
"SET_TEMP_BUFFERS"
]
=
setTemps
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
stringstream
defineAccum
;
if
(
cu
.
getAccumulateInDouble
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_double3"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_real3"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
if
(
useCutoff
)
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
...
...
platforms/cuda2/src/CudaNonbondedUtilities.cpp
View file @
18fb6efc
...
@@ -445,10 +445,22 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -445,10 +445,22 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
context
.
getUseDoublePrecision
())
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
context
.
getUseDoublePrecision
()
&&
!
context
.
getAccumulateInDouble
()
)
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
if
(
context
.
getComputeCapability
()
>=
3.0
&&
!
context
.
getUseDoublePrecision
())
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
stringstream
defineAccum
;
if
(
context
.
getAccumulateInDouble
())
{
defineAccum
<<
"typedef double accum;
\n
"
;
defineAccum
<<
"typedef double3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_double3"
;
}
else
{
defineAccum
<<
"typedef real accum;
\n
"
;
defineAccum
<<
"typedef real3 accum3;
\n
"
;
defines
[
"make_accum3"
]
=
"make_real3"
;
}
replacements
[
"DEFINE_ACCUM"
]
=
defineAccum
.
str
();
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
...
...
platforms/cuda2/src/CudaPlatform.cpp
View file @
18fb6efc
...
@@ -88,12 +88,12 @@ CudaPlatform::CudaPlatform() {
...
@@ -88,12 +88,12 @@ CudaPlatform::CudaPlatform() {
#ifdef _MSC_VER
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
int
length
=
GetShortPathName
(
nvcc
.
c_str
(),
NULL
,
0
);
int
length
=
GetShortPathName
(
nvcc
.
c_str
(),
NULL
,
0
);
if
(
length
>
0
)
{
if
(
length
>
0
)
{
vector
<
char
>
shortName
(
length
);
vector
<
char
>
shortName
(
length
);
GetShortPathName
(
nvcc
.
c_str
(),
&
shortName
[
0
],
length
);
GetShortPathName
(
nvcc
.
c_str
(),
&
shortName
[
0
],
length
);
nvcc
=
string
(
&
shortName
[
0
]);
nvcc
=
string
(
&
shortName
[
0
]);
}
}
setPropertyDefaultValue
(
CudaCompiler
(),
nvcc
);
setPropertyDefaultValue
(
CudaCompiler
(),
nvcc
);
setPropertyDefaultValue
(
CudaTempDirectory
(),
string
(
getenv
(
"TEMP"
)));
setPropertyDefaultValue
(
CudaTempDirectory
(),
string
(
getenv
(
"TEMP"
)));
#else
#else
...
...
platforms/cuda2/src/kernels/customGBEnergyN2.cu
View file @
18fb6efc
...
@@ -2,10 +2,11 @@
...
@@ -2,10 +2,11 @@
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0xFFFFFFFF)));
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0xFFFFFFFF)));
#define TILE_SIZE 32
#define TILE_SIZE 32
DEFINE_ACCUM
typedef
struct
{
typedef
struct
{
real4
posq
;
real4
posq
;
real
3
force
;
accum
3
force
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
#ifdef NEED_PADDING
float
padding
;
float
padding
;
...
@@ -46,7 +47,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -46,7 +47,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
unsigned
int
x
,
y
;
real
3
force
=
make_
real
3
(
0
);
accum
3
force
=
make_
accum
3
(
0
);
DECLARE_ATOM1_DERIVATIVES
DECLARE_ATOM1_DERIVATIVES
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -122,7 +123,9 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -122,7 +123,9 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
}
}
energy
+=
0.5
f
*
tempEnergy
;
energy
+=
0.5
f
*
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
-=
delta
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
@@ -140,7 +143,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -140,7 +143,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
localData
[
localAtomIndex
].
force
=
make_
real
3
(
0
);
localData
[
localAtomIndex
].
force
=
make_
accum
3
(
0
);
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
...
@@ -185,9 +188,13 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -185,9 +188,13 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
}
}
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
-=
delta
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
atom2
=
tbx
+
tj
;
atom2
=
tbx
+
tj
;
localData
[
atom2
].
force
+=
delta
;
localData
[
atom2
].
force
.
x
+=
delta
.
x
;
localData
[
atom2
].
force
.
y
+=
delta
.
y
;
localData
[
atom2
].
force
.
z
+=
delta
.
z
;
RECORD_DERIVATIVE_2
RECORD_DERIVATIVE_2
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
...
...
platforms/cuda2/src/kernels/gbsaObc1.cu
View file @
18fb6efc
...
@@ -4,6 +4,8 @@
...
@@ -4,6 +4,8 @@
#define TILE_SIZE 32
#define TILE_SIZE 32
#define WARPS_PER_GROUP (FORCE_WORK_GROUP_SIZE/TILE_SIZE)
#define WARPS_PER_GROUP (FORCE_WORK_GROUP_SIZE/TILE_SIZE)
DEFINE_ACCUM
/**
/**
* Reduce the Born sums to compute the Born radii.
* Reduce the Born sums to compute the Born radii.
*/
*/
...
@@ -331,7 +333,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -331,7 +333,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
typedef
struct
{
typedef
struct
{
real
x
,
y
,
z
;
real
x
,
y
,
z
;
real
q
;
real
q
;
real
fx
,
fy
,
fz
,
fw
;
accum
fx
,
fy
,
fz
,
fw
;
real
bornRadius
;
real
bornRadius
;
}
AtomData2
;
}
AtomData2
;
...
@@ -372,7 +374,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -372,7 +374,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
unsigned
int
x
,
y
;
real
4
force
=
make_
real
4
(
0
);
accum
4
force
=
make_
accum
4
(
0
);
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
if
(
numTiles
<=
maxTiles
)
{
...
...
platforms/cuda2/src/kernels/nonbonded.cu
View file @
18fb6efc
#define TILE_SIZE 32
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
DEFINE_ACCUM
typedef
struct
{
typedef
struct
{
real
x
,
y
,
z
;
real
x
,
y
,
z
;
real
q
;
real
q
;
real
fx
,
fy
,
fz
;
accum
fx
,
fy
,
fz
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
#ifndef PARAMETER_SIZE_IS_EVEN
real
padding
;
real
padding
;
...
@@ -47,7 +49,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -47,7 +49,7 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
unsigned
int
x
,
y
;
real
3
force
=
make_
real
3
(
0
);
accum
3
force
=
make_
accum
3
(
0
);
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
if
(
numTiles
<=
maxTiles
)
{
...
@@ -124,9 +126,13 @@ extern "C" __global__ void computeNonbonded(
...
@@ -124,9 +126,13 @@ extern "C" __global__ void computeNonbonded(
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
energy
+=
0.5
f
*
tempEnergy
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
force
-=
delta
*
dEdR
;
force
.
x
-=
delta
.
x
*
dEdR
;
force
.
y
-=
delta
.
y
*
dEdR
;
force
.
z
-=
delta
.
z
*
dEdR
;
#else
#else
force
-=
dEdR1
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
excl
>>=
1
;
...
@@ -191,7 +197,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -191,7 +197,9 @@ extern "C" __global__ void computeNonbonded(
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
-=
delta
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
delta
.
x
+=
__shfl_xor
(
delta
.
x
,
i
,
32
);
delta
.
x
+=
__shfl_xor
(
delta
.
x
,
i
,
32
);
delta
.
y
+=
__shfl_xor
(
delta
.
y
,
i
,
32
);
delta
.
y
+=
__shfl_xor
(
delta
.
y
,
i
,
32
);
...
@@ -203,7 +211,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -203,7 +211,9 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
j
].
fz
+=
delta
.
z
;
localData
[
tbx
+
j
].
fz
+=
delta
.
z
;
}
}
#else
#else
force
-=
dEdR1
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
...
@@ -218,12 +228,16 @@ extern "C" __global__ void computeNonbonded(
...
@@ -218,12 +228,16 @@ extern "C" __global__ void computeNonbonded(
#else
#else
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
-=
delta
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
tempBuffer
[
bufferIndex
]
=
delta
.
x
;
tempBuffer
[
bufferIndex
]
=
delta
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
delta
.
y
;
tempBuffer
[
bufferIndex
+
1
]
=
delta
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
delta
.
z
;
tempBuffer
[
bufferIndex
+
2
]
=
delta
.
z
;
#else
#else
force
-=
dEdR1
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
...
@@ -287,12 +301,16 @@ extern "C" __global__ void computeNonbonded(
...
@@ -287,12 +301,16 @@ extern "C" __global__ void computeNonbonded(
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
-=
delta
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
#else
#else
force
-=
dEdR1
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment