Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ff0a69f9
Commit
ff0a69f9
authored
Mar 24, 2009
by
Peter Eastman
Browse files
Deleted lots of obsolete code.
parent
a56ce87d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
61 additions
and
2141 deletions
+61
-2141
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+0
-7
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+0
-1420
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-78
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+0
-12
platforms/cuda/src/kernels/kCalculateObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces1.cu
+0
-399
platforms/cuda/src/kernels/kCalculateObcGbsaForces1_12.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces1_12.cu
+0
-225
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+61
-0
No files found.
platforms/cuda/src/kernels/cudaKernels.h
View file @
ff0a69f9
...
@@ -39,10 +39,7 @@ extern void kGenerateRandoms(gpuContext gpu);
...
@@ -39,10 +39,7 @@ extern void kGenerateRandoms(gpuContext gpu);
// Main loop
// Main loop
extern
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
);
extern
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
);
extern
void
kCalculateCDLJObcGbsaForces1_12
(
gpuContext
gpu
);
extern
void
kCalculateCDLJForces
(
gpuContext
gpu
);
extern
void
kCalculateCDLJForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces1
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces1_12
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces2
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces2
(
gpuContext
gpu
);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
...
@@ -72,10 +69,6 @@ extern void SetCalculateLocalForcesSim(gpuContext gpu);
...
@@ -72,10 +69,6 @@ extern void SetCalculateLocalForcesSim(gpuContext gpu);
extern
void
GetCalculateLocalForcesSim
(
gpuContext
gpu
);
extern
void
GetCalculateLocalForcesSim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaBornSumSim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaBornSumSim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaBornSumSim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaBornSumSim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaForces1Sim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaForces1Sim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaForces1_12Sim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaForces1_12Sim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
GetCalculateObcGbsaForces2Sim
(
gpuContext
gpu
);
extern
void
SetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
extern
void
SetCalculateAndersenThermostatSim
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
ff0a69f9
...
@@ -55,78 +55,6 @@ using namespace std;
...
@@ -55,78 +55,6 @@ using namespace std;
using
OpenMM
::
OpenMMException
;
using
OpenMM
::
OpenMMException
;
#ifdef WIN32
typedef
unsigned
__int64
u64
;
typedef
signed
__int64
s64
;
#else
typedef
uint64_t
u64
;
typedef
int64_t
s64
;
#endif
typedef
unsigned
int
u32
;
typedef
float
f32
;
typedef
double
f64
;
typedef
char
ascii
;
typedef
char
utf8
;
typedef
unsigned
char
u8
;
typedef
signed
char
s8
;
typedef
unsigned
short
u16
;
typedef
signed
short
s16
;
typedef
struct
{
u8
type
[
4
];
f32
charge
;
f32
radius
;
}
FAH_ATOM
;
typedef
struct
{
u32
a
;
/* rule: a < b */
u32
b
;
}
FAH_BOND
;
typedef
struct
{
f32
x
;
f32
y
;
f32
z
;
}
FAH_XYZ
;
typedef
struct
{
u32
magic
;
u32
version
;
utf8
name
[
64
];
s64
timestamp
;
u64
iterations
;
u32
frames
;
u32
atom_count
;
u32
bond_count
;
/* v2 */
utf8
user_name
[
64
];
utf8
user_team
[
16
];
utf8
user_done
[
16
];
}
FAH_INFO
;
typedef
struct
{
u32
magic
;
u32
version
;
s64
timestamp
;
u64
iterations_done
;
u32
frames_done
;
f32
energy
;
f32
temperature
;
}
FAH_CURRENT
;
typedef
struct
{
FAH_INFO
info
;
FAH_CURRENT
current
;
FAH_ATOM
*
atoms
;
FAH_BOND
*
bonds
;
FAH_XYZ
*
xyz
;
}
PROTEIN
;
struct
ShakeCluster
{
struct
ShakeCluster
{
int
centralID
;
int
centralID
;
int
peripheralID
[
3
];
int
peripheralID
[
3
];
...
@@ -196,43 +124,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
...
@@ -196,43 +124,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DeltaShake
#define DeltaShake
extern
"C"
int
gpuReadBondParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
512
];
int
bonds
;
infile
>>
bonds
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom1
(
bonds
);
vector
<
int
>
atom2
(
bonds
);
vector
<
float
>
length
(
bonds
);
vector
<
float
>
k
(
bonds
);
for
(
int
i
=
0
;
i
<
bonds
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
length
[
i
]
>>
k
[
i
];
}
gpuSetBondParameters
(
gpu
,
atom1
,
atom2
,
length
,
k
);
return
bonds
;
}
else
{
cout
<<
"Error opening harmonic bond parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
{
{
...
@@ -268,45 +159,6 @@ void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector
...
@@ -268,45 +159,6 @@ void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector
psBondParameter
->
Upload
();
psBondParameter
->
Upload
();
}
}
extern
"C"
int
gpuReadBondAngleParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
512
];
int
bond_angles
;
infile
>>
bond_angles
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom1
(
bond_angles
);
vector
<
int
>
atom2
(
bond_angles
);
vector
<
int
>
atom3
(
bond_angles
);
vector
<
float
>
angle
(
bond_angles
);
vector
<
float
>
k
(
bond_angles
);
for
(
int
i
=
0
;
i
<
bond_angles
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
atom3
[
i
]
>>
angle
[
i
]
>>
k
[
i
];
}
gpuSetBondAngleParameters
(
gpu
,
atom1
,
atom2
,
atom3
,
angle
,
k
);
return
bond_angles
;
}
else
{
cout
<<
"Error opening harmonic bond angle parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
float
>&
angle
,
const
vector
<
float
>&
k
)
const
vector
<
float
>&
angle
,
const
vector
<
float
>&
k
)
...
@@ -352,48 +204,6 @@ void gpuSetBondAngleParameters(gpuContext gpu, const vector<int>& atom1, const v
...
@@ -352,48 +204,6 @@ void gpuSetBondAngleParameters(gpuContext gpu, const vector<int>& atom1, const v
psBondAngleParameter
->
Upload
();
psBondAngleParameter
->
Upload
();
}
}
extern
"C"
int
gpuReadDihedralParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
512
];
int
dihedrals
;
infile
>>
dihedrals
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom1
(
dihedrals
);
vector
<
int
>
atom2
(
dihedrals
);
vector
<
int
>
atom3
(
dihedrals
);
vector
<
int
>
atom4
(
dihedrals
);
vector
<
float
>
k
(
dihedrals
);
vector
<
float
>
phase
(
dihedrals
);
vector
<
int
>
periodicity
(
dihedrals
);
for
(
int
i
=
0
;
i
<
dihedrals
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
atom3
[
i
]
>>
atom4
[
i
]
>>
k
[
i
]
>>
phase
[
i
]
>>
periodicity
[
i
];
}
gpuSetDihedralParameters
(
gpu
,
atom1
,
atom2
,
atom3
,
atom4
,
k
,
phase
,
periodicity
);
return
dihedrals
;
}
else
{
cout
<<
"Error opening dihedral parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
const
vector
<
float
>&
k
,
const
vector
<
float
>&
phase
,
const
vector
<
int
>&
periodicity
)
const
vector
<
float
>&
k
,
const
vector
<
float
>&
phase
,
const
vector
<
int
>&
periodicity
)
...
@@ -443,68 +253,6 @@ void gpuSetDihedralParameters(gpuContext gpu, const vector<int>& atom1, const ve
...
@@ -443,68 +253,6 @@ void gpuSetDihedralParameters(gpuContext gpu, const vector<int>& atom1, const ve
psDihedralParameter
->
Upload
();
psDihedralParameter
->
Upload
();
}
}
extern
"C"
int
gpuReadRbDihedralParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
512
];
int
rb_dihedrals
;
infile
>>
rb_dihedrals
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom1
(
rb_dihedrals
);
vector
<
int
>
atom2
(
rb_dihedrals
);
vector
<
int
>
atom3
(
rb_dihedrals
);
vector
<
int
>
atom4
(
rb_dihedrals
);
vector
<
float
>
c0
(
rb_dihedrals
);
vector
<
float
>
c1
(
rb_dihedrals
);
vector
<
float
>
c2
(
rb_dihedrals
);
vector
<
float
>
c3
(
rb_dihedrals
);
vector
<
float
>
c4
(
rb_dihedrals
);
vector
<
float
>
c5
(
rb_dihedrals
);
gpu
->
sim
.
rb_dihedrals
=
rb_dihedrals
;
CUDAStream
<
int4
>*
psRbDihedralID1
=
new
CUDAStream
<
int4
>
(
rb_dihedrals
,
1
);
gpu
->
psRbDihedralID1
=
psRbDihedralID1
;
gpu
->
sim
.
pRbDihedralID1
=
psRbDihedralID1
->
_pDevStream
[
0
];
CUDAStream
<
int4
>*
psRbDihedralID2
=
new
CUDAStream
<
int4
>
(
rb_dihedrals
,
1
);
gpu
->
psRbDihedralID2
=
psRbDihedralID2
;
gpu
->
sim
.
pRbDihedralID2
=
psRbDihedralID2
->
_pDevStream
[
0
];
CUDAStream
<
float4
>*
psRbDihedralParameter1
=
new
CUDAStream
<
float4
>
(
rb_dihedrals
,
1
);
gpu
->
psRbDihedralParameter1
=
psRbDihedralParameter1
;
gpu
->
sim
.
pRbDihedralParameter1
=
psRbDihedralParameter1
->
_pDevStream
[
0
];
CUDAStream
<
float2
>*
psRbDihedralParameter2
=
new
CUDAStream
<
float2
>
(
rb_dihedrals
,
1
);
gpu
->
psRbDihedralParameter2
=
psRbDihedralParameter2
;
gpu
->
sim
.
pRbDihedralParameter2
=
psRbDihedralParameter2
->
_pDevStream
[
0
];
for
(
int
i
=
0
;
i
<
rb_dihedrals
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
atom3
[
i
]
>>
atom4
[
i
]
>>
c0
[
i
]
>>
c1
[
i
]
>>
c2
[
i
]
>>
c3
[
i
]
>>
c4
[
i
]
>>
c5
[
i
];
}
gpuSetRbDihedralParameters
(
gpu
,
atom1
,
atom2
,
atom3
,
atom4
,
c0
,
c1
,
c2
,
c3
,
c4
,
c5
);
return
rb_dihedrals
;
}
else
{
cout
<<
"Error opening Ryckaert-Bellemans dihedral parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
int
>&
atom3
,
const
vector
<
int
>&
atom4
,
const
vector
<
float
>&
c0
,
const
vector
<
float
>&
c1
,
const
vector
<
float
>&
c2
,
const
vector
<
float
>&
c3
,
const
vector
<
float
>&
c4
,
const
vector
<
float
>&
c5
)
const
vector
<
float
>&
c0
,
const
vector
<
float
>&
c1
,
const
vector
<
float
>&
c2
,
const
vector
<
float
>&
c3
,
const
vector
<
float
>&
c4
,
const
vector
<
float
>&
c5
)
...
@@ -566,56 +314,6 @@ void gpuSetRbDihedralParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -566,56 +314,6 @@ void gpuSetRbDihedralParameters(gpuContext gpu, const vector<int>& atom1, const
psRbDihedralParameter2
->
Upload
();
psRbDihedralParameter2
->
Upload
();
}
}
extern
"C"
int
gpuReadLJ14Parameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
1024
];
float
epsfac
=
0.0
f
;
float
fudge
=
0.0
f
;
int
LJ14s
;
infile
>>
LJ14s
;
infile
.
get
(
buff
,
61
);
// cout << buff << endl;
infile
>>
epsfac
;
infile
.
get
(
buff
,
8
);
infile
>>
fudge
;
infile
.
getline
(
buff
,
512
);
// cout << buff << endl;
vector
<
int
>
atom1
(
LJ14s
);
vector
<
int
>
atom2
(
LJ14s
);
vector
<
float
>
c6
(
LJ14s
);
vector
<
float
>
c12
(
LJ14s
);
vector
<
float
>
q1
(
LJ14s
);
vector
<
float
>
q2
(
LJ14s
);
for
(
int
i
=
0
;
i
<
LJ14s
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
c6
[
i
]
>>
c12
[
i
]
>>
q1
[
i
]
>>
q2
[
i
];
}
gpuSetLJ14Parameters
(
gpu
,
epsfac
,
fudge
,
atom1
,
atom2
,
c6
,
c12
,
q1
,
q2
);
return
LJ14s
;
}
else
{
cout
<<
"Error opening Lennard-Jones 1-4 parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q1
,
const
vector
<
float
>&
q2
)
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q1
,
const
vector
<
float
>&
q2
)
...
@@ -672,158 +370,6 @@ void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vecto
...
@@ -672,158 +370,6 @@ void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vecto
psLJ14Parameter
->
Upload
();
psLJ14Parameter
->
Upload
();
}
}
extern
"C"
float
gpuGetAtomicRadius
(
gpuContext
gpu
,
string
s
)
{
for
(
int
i
=
0
;
i
<
gpu
->
gAtomTypes
;
i
++
)
{
if
(
s
==
gpu
->
gpAtomTable
[
i
].
name
)
{
return
gpu
->
gpAtomTable
[
i
].
r
;
}
}
return
0.0
f
;
}
extern
"C"
unsigned
char
gpuGetAtomicSymbol
(
gpuContext
gpu
,
string
s
)
{
for
(
int
i
=
0
;
i
<
gpu
->
gAtomTypes
;
i
++
)
{
if
(
s
==
gpu
->
gpAtomTable
[
i
].
name
)
{
return
gpu
->
gpAtomTable
[
i
].
symbol
;
}
}
return
' '
;
}
extern
"C"
int
gpuReadAtomicParameters
(
gpuContext
gpu
,
char
*
fname
)
{
gpu
->
gAtomTypes
=
0
;
if
(
gpu
->
gpAtomTable
)
delete
[]
gpu
->
gpAtomTable
;
// Read file once to count atom types
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
1024
];
int
skips
=
0
;
bool
skipflag
=
true
;
while
(
infile
.
getline
(
buff
,
512
))
{
if
(
buff
[
0
]
==
' '
)
{
skipflag
=
false
;
gpu
->
gAtomTypes
++
;
}
else
if
(
skipflag
)
skips
++
;
}
infile
.
close
();
gpu
->
gpAtomTable
=
new
gpuAtomType
[
gpu
->
gAtomTypes
];
ifstream
infile1
(
fname
);
for
(
int
i
=
0
;
i
<
skips
;
i
++
)
{
infile1
.
getline
(
buff
,
512
);
}
for
(
int
i
=
0
;
i
<
gpu
->
gAtomTypes
;
i
++
)
{
infile1
>>
gpu
->
gpAtomTable
[
i
].
name
>>
gpu
->
gpAtomTable
[
i
].
r
;
infile1
.
getline
(
buff
,
512
);
// Determine symbol
if
(
gpu
->
gpAtomTable
[
i
].
r
<
1.3
f
)
gpu
->
gpAtomTable
[
i
].
symbol
=
'H'
;
else
if
(
gpu
->
gpAtomTable
[
i
].
r
<
1.6
f
)
gpu
->
gpAtomTable
[
i
].
symbol
=
'O'
;
else
if
(
gpu
->
gpAtomTable
[
i
].
r
<
1.7
f
)
gpu
->
gpAtomTable
[
i
].
symbol
=
'N'
;
else
gpu
->
gpAtomTable
[
i
].
symbol
=
'C'
;
#if (DUMP_PARAMETERS == 1)
cout
<<
i
<<
" "
<<
gpu
->
gpAtomTable
[
i
].
name
<<
" "
<<
gpu
->
gpAtomTable
[
i
].
symbol
<<
" "
<<
gpu
->
gpAtomTable
[
i
].
r
<<
endl
;
#endif
}
return
gpu
->
gAtomTypes
;
}
else
{
cout
<<
"Error opening atom parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
int
gpuReadCoulombParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
1024
];
unsigned
int
coulombs
;
float
fudge
=
0.0
f
;
float
epsfac
=
1.0
f
;
infile
>>
coulombs
;
infile
.
get
(
buff
,
9
);
infile
>>
epsfac
;
infile
.
get
(
buff
,
8
);
infile
>>
fudge
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom
(
coulombs
);
vector
<
float
>
c6
(
coulombs
);
vector
<
float
>
c12
(
coulombs
);
vector
<
float
>
q
(
coulombs
);
vector
<
float
>
radius
(
coulombs
);
vector
<
float
>
scale
(
coulombs
);
vector
<
char
>
symbol
(
coulombs
);
vector
<
vector
<
int
>
>
exclusions
(
coulombs
);
unsigned
int
total_exclusions
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
coulombs
;
i
++
)
{
int
junk
,
numExclusions
;
char
atype
[
512
];
infile
>>
junk
>>
c6
[
i
]
>>
c12
[
i
]
>>
q
[
i
]
>>
atype
>>
scale
[
i
]
>>
numExclusions
;
radius
[
i
]
=
gpuGetAtomicRadius
(
gpu
,
atype
);
symbol
[
i
]
=
gpuGetAtomicSymbol
(
gpu
,
atype
);
for
(
int
j
=
0
;
j
<
numExclusions
;
j
++
)
{
int
exclusion
;
infile
>>
exclusion
;
exclusions
[
i
].
push_back
(
exclusion
);
}
}
cout
<<
total_exclusions
<<
" total exclusions.
\n
"
;
gpuSetCoulombParameters
(
gpu
,
epsfac
,
atom
,
c6
,
c12
,
q
,
symbol
,
exclusions
,
NO_CUTOFF
);
gpuSetObcParameters
(
gpu
,
defaultInnerDielectric
,
defaultSolventDielectric
,
atom
,
radius
,
scale
);
return
coulombs
;
}
else
{
cout
<<
"Error opening Coulomb parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
vector
<
int
>&
atom
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q
,
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
vector
<
int
>&
atom
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q
,
const
vector
<
char
>&
symbol
,
const
vector
<
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
)
const
vector
<
char
>&
symbol
,
const
vector
<
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
)
...
@@ -911,44 +457,6 @@ void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDie
...
@@ -911,44 +457,6 @@ void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDie
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
}
}
extern
"C"
int
gpuReadShakeParameters
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
if
(
!
infile
.
fail
())
{
char
buff
[
512
];
int
shake_constraints
;
infile
>>
buff
>>
shake_constraints
;
infile
.
getline
(
buff
,
512
);
vector
<
int
>
atom1
(
shake_constraints
);
vector
<
int
>
atom2
(
shake_constraints
);
vector
<
float
>
distance
(
shake_constraints
);
vector
<
float
>
invMass1
(
shake_constraints
);
vector
<
float
>
invMass2
(
shake_constraints
);
for
(
int
i
=
0
;
i
<
shake_constraints
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
atom1
[
i
]
>>
atom2
[
i
]
>>
distance
[
i
]
>>
invMass1
[
i
]
>>
invMass2
[
i
];
}
gpuSetShakeParameters
(
gpu
,
atom1
,
atom2
,
distance
,
invMass1
,
invMass2
,
1e-4
f
);
return
gpu
->
sim
.
ShakeConstraints
;
}
else
{
cout
<<
"Error opening Shake parameter file "
<<
fname
<<
endl
;
exit
(
-
1
);
}
return
0
;
}
extern
"C"
extern
"C"
void
gpuSetShakeParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
distance
,
void
gpuSetShakeParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
distance
,
const
vector
<
float
>&
invMass1
,
const
vector
<
float
>&
invMass2
,
float
tolerance
)
const
vector
<
float
>&
invMass1
,
const
vector
<
float
>&
invMass2
,
float
tolerance
)
...
@@ -1230,45 +738,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
...
@@ -1230,45 +738,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
return
1
;
return
1
;
}
}
extern
"C"
void
gpuReadCoordinates
(
gpuContext
gpu
,
char
*
fname
)
{
ifstream
infile
(
fname
);
gpu
->
natoms
=
0
;
char
buff
[
512
];
infile
>>
buff
>>
gpu
->
natoms
;
infile
.
getline
(
buff
,
511
);
float
totalMass
=
0.0
f
;
gpuAllocateInitialBuffers
(
gpu
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
>>
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
>>
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
>>
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
w
>>
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
x
>>
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
y
>>
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
z
>>
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
w
;
gpu
->
psxVector4
->
_pSysStream
[
0
][
i
].
x
=
0.0
f
;
gpu
->
psxVector4
->
_pSysStream
[
0
][
i
].
y
=
0.0
f
;
gpu
->
psxVector4
->
_pSysStream
[
0
][
i
].
z
=
0.0
f
;
gpu
->
psxVector4
->
_pSysStream
[
0
][
i
].
w
=
0.0
f
;
// Accumulate mass
totalMass
+=
1.0
f
/
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
w
;
}
gpu
->
sim
.
inverseTotalMass
=
1.0
f
/
totalMass
;
gpu
->
psPosq4
->
Upload
();
gpu
->
psVelm4
->
Upload
();
gpu
->
psxVector4
->
Upload
();
}
extern
"C"
extern
"C"
void
gpuSetPositions
(
gpuContext
gpu
,
const
vector
<
float
>&
x
,
const
vector
<
float
>&
y
,
const
vector
<
float
>&
z
)
void
gpuSetPositions
(
gpuContext
gpu
,
const
vector
<
float
>&
x
,
const
vector
<
float
>&
y
,
const
vector
<
float
>&
z
)
{
{
...
@@ -1341,37 +810,6 @@ bool gpuIsAvailable()
...
@@ -1341,37 +810,6 @@ bool gpuIsAvailable()
return
(
deviceCount
>
0
);
return
(
deviceCount
>
0
);
}
}
extern
"C"
void
*
gpuInitFromFile
(
char
*
fname
)
{
ifstream
infile
(
fname
);
int
numAtoms
=
0
;
char
buff
[
512
];
infile
>>
buff
>>
numAtoms
;
gpuContext
gpu
=
(
gpuContext
)
gpuInit
(
numAtoms
);
vector
<
float
>
x
(
numAtoms
),
y
(
numAtoms
),
z
(
numAtoms
),
charge
(
numAtoms
),
vx
(
numAtoms
),
vy
(
numAtoms
),
vz
(
numAtoms
),
mass
(
numAtoms
);
infile
.
getline
(
buff
,
511
);
float
totalMass
=
0.0
f
;
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
int
junk
;
infile
>>
junk
>>
x
[
i
]
>>
y
[
i
]
>>
z
[
i
]
>>
charge
[
i
]
>>
vx
[
i
]
>>
vy
[
i
]
>>
vz
[
i
]
>>
mass
[
i
];
mass
[
i
]
=
1.0
f
/
mass
[
i
];
}
gpuSetPositions
(
gpu
,
x
,
y
,
z
);
gpuSetVelocities
(
gpu
,
vx
,
vy
,
vz
);
gpuSetMass
(
gpu
,
mass
);
return
(
void
*
)
gpu
;
}
extern
"C"
extern
"C"
void
*
gpuInit
(
int
numAtoms
)
void
*
gpuInit
(
int
numAtoms
)
{
{
...
@@ -1979,7 +1417,6 @@ int gpuSetConstants(gpuContext gpu)
...
@@ -1979,7 +1417,6 @@ int gpuSetConstants(gpuContext gpu)
SetCalculateCDLJObcGbsaForces1Sim
(
gpu
);
SetCalculateCDLJObcGbsaForces1Sim
(
gpu
);
SetCalculateLocalForcesSim
(
gpu
);
SetCalculateLocalForcesSim
(
gpu
);
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateObcGbsaForces1Sim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetCalculateAndersenThermostatSim
(
gpu
);
SetForcesSim
(
gpu
);
SetForcesSim
(
gpu
);
...
@@ -1988,866 +1425,9 @@ int gpuSetConstants(gpuContext gpu)
...
@@ -1988,866 +1425,9 @@ int gpuSetConstants(gpuContext gpu)
SetBrownianUpdateSim
(
gpu
);
SetBrownianUpdateSim
(
gpu
);
SetSettleSim
(
gpu
);
SetSettleSim
(
gpu
);
SetRandomSim
(
gpu
);
SetRandomSim
(
gpu
);
if
(
gpu
->
sm_version
>=
SM_12
)
{
SetCalculateObcGbsaForces1_12Sim
(
gpu
);
}
return
1
;
return
1
;
}
}
extern
"C"
void
gpuDumpCoordinates
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psVelm4
->
Download
();
(
void
)
printf
(
"
\n\n
Coordinates and velocities
\n
"
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
printf
(
"%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
w
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
w
);
}
}
bool
ISNAN
(
float
f
)
{
return
!
(
f
==
f
);
}
extern
"C"
bool
gpuCheckData
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psVelm4
->
Download
();
gpu
->
psForce4
->
Download
();
gpu
->
psBornForce
->
Download
();
int
violations
=
0
;
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
if
(
ISNAN
(
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
)
||
ISNAN
(
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
)
||
ISNAN
(
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
)
||
ISNAN
(
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
x
)
||
ISNAN
(
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
y
)
||
ISNAN
(
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
z
)
||
ISNAN
(
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
x
)
||
ISNAN
(
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
y
)
||
ISNAN
(
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
z
)
||
ISNAN
(
gpu
->
psBornForce
->
_pSysStream
[
0
][
i
]))
{
printf
(
"%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psVelm4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
i
]
);
violations
++
;
}
}
if
(
violations
>
0
)
{
printf
(
"%d total violations
\n
"
,
violations
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
float
dmin
=
99999999.0
f
;
int
closest
=
-
9999
;
float
x
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
;
float
y
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
;
float
z
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
;
for
(
int
j
=
0
;
j
<
gpu
->
natoms
;
j
++
)
{
if
(
j
!=
i
)
{
float
dx
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
j
].
x
-
x
;
float
dy
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
j
].
y
-
y
;
float
dz
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
j
].
z
-
z
;
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
if
(
r
<
dmin
)
{
dmin
=
r
;
closest
=
j
;
}
}
}
printf
(
"Atom %4d: Closest neighbor is Atom %4d, %11.5e
\n
"
,
i
,
closest
,
dmin
);
}
gpuDumpAtomData
(
gpu
);
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
kCPUCalculateLocalForces
(
gpu
);
// Determine which forces have gone awry
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
kCalculateCDLJForces
(
gpu
);
kReduceForces
(
gpu
);
printf
(
"Nonbond Forces
\n
"
);
gpuDumpForces
(
gpu
);
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
kCalculateObcGbsaForces1
(
gpu
);
kReduceObcGbsaBornForces
(
gpu
);
kCalculateObcGbsaForces2
(
gpu
);
kReduceForces
(
gpu
);
printf
(
"OBC Forces
\n
"
);
gpuDumpForces
(
gpu
);
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
kCalculateLocalForces
(
gpu
);
kReduceForces
(
gpu
);
printf
(
"Local Forces
\n
"
);
gpuDumpForces
(
gpu
);
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
kReduceForces
(
gpu
);
printf
(
"Cleared Forces
\n
"
);
gpuDumpForces
(
gpu
);
return
false
;
}
return
true
;
}
extern
"C"
void
kCPUCalculate14
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psForce4
->
Download
();
// gpu->psLJ14ID->Download();
// gpu->psLJ14Parameter->Download();
for
(
int
pos
=
0
;
pos
<
(
int
)
gpu
->
sim
.
LJ14s
;
pos
++
)
{
int4
atom
=
gpu
->
psLJ14ID
->
_pSysStream
[
0
][
pos
];
float4
LJ14
=
gpu
->
psLJ14Parameter
->
_pSysStream
[
0
][
pos
];
float4
a1
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
atom
.
x
];
float4
a2
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
atom
.
y
];
float3
d
;
d
.
x
=
a1
.
x
-
a2
.
x
;
d
.
y
=
a1
.
y
-
a2
.
y
;
d
.
z
=
a1
.
z
-
a2
.
z
;
float
r2
=
d
.
x
*
d
.
x
+
d
.
y
*
d
.
y
+
d
.
z
*
d
.
z
;
float
inverseR
=
1.0
f
/
sqrt
(
r2
);
float
sig2
=
inverseR
*
LJ14
.
y
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
dEdR
=
LJ14
.
x
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
+=
LJ14
.
z
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
unsigned
int
offsetA
=
atom
.
x
+
atom
.
z
*
gpu
->
sim
.
stride
;
unsigned
int
offsetB
=
atom
.
y
+
atom
.
w
*
gpu
->
sim
.
stride
;
float4
forceA
=
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetA
];
float4
forceB
=
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetB
];
d
.
x
*=
dEdR
;
d
.
y
*=
dEdR
;
d
.
z
*=
dEdR
;
forceA
.
x
+=
d
.
x
;
forceA
.
y
+=
d
.
y
;
forceA
.
z
+=
d
.
z
;
forceB
.
x
-=
d
.
x
;
forceB
.
y
-=
d
.
y
;
forceB
.
z
-=
d
.
z
;
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetA
]
=
forceA
;
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetB
]
=
forceB
;
printf
(
"%4d: %4d - %4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
pos
,
atom
.
x
,
atom
.
y
,
r2
,
dEdR
,
sig2
,
sig6
,
LJ14
.
x
,
LJ14
.
z
);
}
}
extern
"C"
void
gpuDumpPrimeCoordinates
(
gpuContext
gpu
)
{
gpu
->
psPosqP4
->
Download
();
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
printf
(
"%4d: %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psPosqP4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psPosqP4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psPosqP4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psPosqP4
->
_pSysStream
[
0
][
i
].
w
);
}
}
extern
"C"
void
gpuDumpForces
(
gpuContext
gpu
)
{
gpu
->
psForce4
->
Download
();
gpu
->
psBornForce
->
Download
();
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
char
buff
[
512
];
sprintf
(
buff
,
"%4d: %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
i
]
);
// OutputDebugString(buff);
}
}
extern
"C"
void
gpuDumpAtomData
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psSigEps2
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psObcChain
->
Download
();
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
char
buff
[
512
];
sprintf
(
buff
,
"%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
w
,
gpu
->
psSigEps2
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psSigEps2
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psBornRadii
->
_pSysStream
[
0
][
i
],
gpu
->
psObcChain
->
_pSysStream
[
0
][
i
]
);
// OutputDebugString((LPCWSTR)buff);
}
}
extern
"C"
void
gpuSetup
(
void
*
pVoid
)
{
gpuContext
gpu
=
(
gpuContext
)
pVoid
;
// Read parameters
cout
<<
gpuReadAtomicParameters
(
gpu
,
"Data/atomicradii.txt"
)
<<
" atom types
\n
"
;
cout
<<
gpuReadBondParameters
(
gpu
,
"Data/GromacsHarmonicBondParameter.txt"
)
<<
" bond parameters.
\n
"
;
cout
<<
gpuReadBondAngleParameters
(
gpu
,
"Data/GromacsAngleBondParameter.txt"
)
<<
" bond angle parameters.
\n
"
;
cout
<<
gpuReadDihedralParameters
(
gpu
,
"Data/GromacsProperDihedralParameter.txt"
)
<<
" proper dihedral parameters.
\n
"
;
cout
<<
gpuReadRbDihedralParameters
(
gpu
,
"Data/GromacsRbDihedralParameter.txt"
)
<<
" Ryckaert-Bellemans dihedral parameters.
\n
"
;
cout
<<
gpuReadLJ14Parameters
(
gpu
,
"Data/GromacsLJ14Parameter.txt"
)
<<
" Lennard-Jones 1-4 parameters.
\n
"
;
cout
<<
gpuReadCoulombParameters
(
gpu
,
"Data/GromacsLJCoulombParameter.txt"
)
<<
" Coulomb parameters.
\n
"
;
cout
<<
gpuReadShakeParameters
(
gpu
,
"Data/GromacsShakeParameters.txt"
)
<<
" shake parameters.
\n
"
;
// Build thread block work list
gpuBuildThreadBlockWorkList
(
gpu
);
// Build exclusion list
gpuBuildExclusionList
(
gpu
);
// Create output buffers
gpuBuildOutputBuffers
(
gpu
);
// Set constant blocks
gpuSetConstants
(
gpu
);
// Initialize randoms
gpuInitializeRandoms
(
gpu
);
// Initialize Born Radii;
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
kClearForces
(
gpu
);
kClearBornForces
(
gpu
);
return
;
}
#define DOT3(v1, v2) (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z)
#define GETNORMEDDOTPRODUCT(v1, v2, dp) \
{
\
dp
=
DOT3
(
v1
,
v2
);
\
float
norm1
=
DOT3
(
v1
,
v1
);
\
float
norm2
=
DOT3
(
v2
,
v2
);
\
dp
/=
sqrt
(
norm1
*
norm2
);
\
dp
=
min
(
dp
,
1.0
f
);
\
dp
=
max
(
dp
,
-
1.0
f
);
\
}
#define CROSS_PRODUCT(v1, v2, c) \
c
.
x
=
v1
.
y
*
v2
.
z
-
v1
.
z
*
v2
.
y
;
\
c
.
y
=
v1
.
z
*
v2
.
x
-
v1
.
x
*
v2
.
z
;
\
c
.
z
=
v1
.
x
*
v2
.
y
-
v1
.
y
*
v2
.
x
;
#define GETPREFACTORSGIVENANGLECOSINE(cosine, param, dEdR) \
{
\
float
angle
=
acos
(
cosine
);
\
float
deltaIdeal
=
angle
-
(
param
.
x
*
(
3.14159265
f
/
180.0
f
));
\
dEdR
=
param
.
y
*
deltaIdeal
;
\
}
#define GETANGLEBETWEENTWOVECTORS(v1, v2, angle) \
{
\
float
dp
;
\
GETNORMEDDOTPRODUCT
(
v1
,
v2
,
dp
);
\
angle
=
acos
(
dp
);
\
}
#define GETANGLECOSINEBETWEENTWOVECTORS(v1, v2, angle, cosine) \
{
\
GETNORMEDDOTPRODUCT
(
v1
,
v2
,
cosine
);
\
angle
=
acos
(
cosine
);
\
}
#define GETDIHEDRALANGLEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle) \
{
\
CROSS_PRODUCT
(
vector1
,
vector2
,
cp0
);
\
CROSS_PRODUCT
(
vector2
,
vector3
,
cp1
);
\
GETANGLEBETWEENTWOVECTORS
(
cp0
,
cp1
,
angle
);
\
float
dp
=
DOT3
(
signVector
,
cp1
);
\
angle
=
(
dp
>=
0
)
?
angle
:
-
angle
;
\
}
#define GETDIHEDRALANGLECOSINEBETWEENTHREEVECTORS(vector1, vector2, vector3, signVector, cp0, cp1, angle, cosine) \
{
\
CROSS_PRODUCT
(
vector1
,
vector2
,
cp0
);
\
CROSS_PRODUCT
(
vector2
,
vector3
,
cp1
);
\
GETANGLECOSINEBETWEENTWOVECTORS
(
cp0
,
cp1
,
angle
,
cosine
);
\
float
dp
=
DOT3
(
signVector
,
cp1
);
\
angle
=
(
dp
>=
0
)
?
angle
:
-
angle
;
\
}
// Calculate Local forces on CPU
extern
"C"
void
kCPUCalculateLocalForces
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psForce4
->
Download
();
gpu
->
psBondID
->
Download
();
gpu
->
psBondParameter
->
Download
();
gpu
->
psBondAngleID1
->
Download
();
gpu
->
psBondAngleID2
->
Download
();
gpu
->
psBondAngleParameter
->
Download
();
gpu
->
psDihedralID1
->
Download
();
gpu
->
psDihedralID2
->
Download
();
gpu
->
psDihedralParameter
->
Download
();
gpu
->
psRbDihedralID1
->
Download
();
gpu
->
psRbDihedralID2
->
Download
();
gpu
->
psRbDihedralParameter1
->
Download
();
gpu
->
psRbDihedralParameter2
->
Download
();
gpu
->
psLJ14ID
->
Download
();
gpu
->
psLJ14Parameter
->
Download
();
unsigned
int
pos
=
0
;
Vectors
V
;
Vectors
*
A
=
&
V
;
int
violations
=
0
;
while
(
pos
<
gpu
->
sim
.
bond_offset
)
{
if
(
pos
<
gpu
->
sim
.
bonds
)
{
int4
atom
=
gpu
->
psBondID
->
_pSysStream
[
0
][
pos
];
float4
atomA
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
atom
.
x
];
float4
atomB
=
gpu
->
psPosq4
->
_pSysStream
[
0
][
atom
.
y
];
float2
bond
=
gpu
->
psBondParameter
->
_pSysStream
[
0
][
pos
];
float
dx
=
atomB
.
x
-
atomA
.
x
;
float
dy
=
atomB
.
y
-
atomA
.
y
;
float
dz
=
atomB
.
z
-
atomA
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
float
deltaIdeal
=
r
-
bond
.
x
;
float
dEdR
=
bond
.
y
*
deltaIdeal
;
dEdR
=
(
r
>
0.0
f
)
?
(
dEdR
/
r
)
:
0.0
f
;
if
(
fabs
(
deltaIdeal
)
>
1.0
f
)
{
printf
(
"Bond %4d: %11.4f %11.4f %11.4f %11.4f %11.4f %11.4f
\n
"
,
pos
,
dx
,
dy
,
dz
,
r
,
deltaIdeal
,
dEdR
);
violations
++
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
unsigned
int
offsetA
=
atom
.
x
+
atom
.
z
*
gpu
->
sim
.
stride
;
unsigned
int
offsetB
=
atom
.
y
+
atom
.
w
*
gpu
->
sim
.
stride
;
float4
forceA
=
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetA
];
float4
forceB
=
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetB
];
forceA
.
x
+=
dx
;
forceA
.
y
+=
dy
;
forceA
.
z
+=
dz
;
forceB
.
x
-=
dx
;
forceB
.
y
-=
dy
;
forceB
.
z
-=
dz
;
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetA
]
=
forceA
;
gpu
->
psForce4
->
_pSysStream
[
0
][
offsetB
]
=
forceB
;
}
pos
++
;
}
#if 0
while (pos < gpu->sim.bond_angle_offset)
{
unsigned int pos1 = pos - gpu->sim.bond_offset;
if (pos1 < gpu->sim.bond_angles)
{
int4 atom1 = gpu->psBondAngleID1->_pSysStream[0][pos1];
float2 bond_angle = gpu->psBondAngleParameter->_pSysStream[0][pos1];
float4 a1 = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 a2 = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 a3 = gpu->psPosq4->_pSysStream[0][atom1.z];
A->v0.x = a2.x - a1.x;
A->v0.y = a2.y - a1.y;
A->v0.z = a2.z - a1.z;
A->v1.x = a2.x - a3.x;
A->v1.y = a2.y - a3.y;
A->v1.z = a2.z - a3.z;
float3 cp;
CROSS_PRODUCT(A->v0, A->v1, cp);
float rp = DOT3(cp, cp); //cx * cx + cy * cy + cz * cz;
rp = max(sqrt(rp), 1.0e-06f);
float r21 = DOT3(A->v0, A->v0); // dx1 * dx1 + dy1 * dy1 + dz1 * dz1;
float r23 = DOT3(A->v1, A->v1); // dx2 * dx2 + dy2 * dy2 + dz2 * dz2;
float dot = DOT3(A->v0, A->v1); // dx1 * dx2 + dy1 * dy2 + dz1 * dz2;
float cosine = dot / sqrt(r21 * r23);
float dEdR;
GETPREFACTORSGIVENANGLECOSINE(cosine, bond_angle, dEdR);
printf("Bond angle %4d %11.4f %11.4f\n", pos1, cosine, dEdR);
float termA = dEdR / (r21 * rp);
float termC = -dEdR / (r23 * rp);
float3 c21;
float3 c23;
CROSS_PRODUCT(A->v0, cp, c21);
CROSS_PRODUCT(A->v1, cp, c23);
c21.x *= termA;
c21.y *= termA;
c21.z *= termA;
c23.x *= termC;
c23.y *= termC;
c23.z *= termC;
int2 atom2 = gpu->psBondAngleID2->_pSysStream[0][pos1];
unsigned int offset = atom1.x + atom1.w * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
force.x += c21.x;
force.y += c21.y;
force.z += c21.z;
gpu->psForce4->_pSysStream[0][offset] = force;
offset = atom1.y + atom2.x * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x -= (c21.x + c23.x);
force.y -= (c21.y + c23.y);
force.z -= (c21.z + c23.z);
gpu->psForce4->_pSysStream[0][offset] = force;
offset = atom1.z + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += c23.x;
force.y += c23.y;
force.z += c23.z;
gpu->psForce4->_pSysStream[0][offset] = force;
}
pos++;
}
while (pos < gpu->sim.dihedral_offset)
{
unsigned int pos1 = pos - gpu->sim.bond_angle_offset;
if (pos1 < gpu->sim.dihedrals)
{
int4 atom1 = gpu->psDihedralID1->_pSysStream[0][pos1];
float4 atomA = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 atomB = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 atomC = gpu->psPosq4->_pSysStream[0][atom1.z];
float4 atomD = gpu->psPosq4->_pSysStream[0][atom1.w];
A->v0.x = atomA.x - atomB.x;
A->v0.y = atomA.y - atomB.y;
A->v0.z = atomA.z - atomB.z;
A->v1.x = atomC.x - atomB.x;
A->v1.y = atomC.y - atomB.y;
A->v1.z = atomC.z - atomB.z;
A->v2.x = atomC.x - atomD.x;
A->v2.y = atomC.y - atomD.y;
A->v2.z = atomC.z - atomD.z;
float3 cp0, cp1;
float dihedralAngle;
GETDIHEDRALANGLEBETWEENTHREEVECTORS(A->v0, A->v1, A->v2, A->v0, cp0, cp1, dihedralAngle);
float4 dihedral = gpu->psDihedralParameter->_pSysStream[0][pos1];
float deltaAngle = dihedral.z * dihedralAngle - (dihedral.y * 3.14159265f / 180.0f);
float sinDeltaAngle = sin(deltaAngle);
float dEdAngle = -dihedral.x * dihedral.z * sinDeltaAngle;
float normCross1 = DOT3(cp0, cp0);
float normBC = sqrt(DOT3(A->v1, A->v1));
float4 ff;
ff.x = (-dEdAngle * normBC) / normCross1;
float normCross2 = DOT3(cp1, cp1);
ff.w = (dEdAngle * normBC) / normCross2;
float dp = 1.0f / DOT3(A->v1, A->v1);
ff.y = DOT3(A->v0, A->v1) * dp;
ff.z = DOT3(A->v2, A->v1) * dp;
int4 atom2 = gpu->psDihedralID2->_pSysStream[0][pos1];
float3 internalF0;
float3 internalF3;
float3 s;
// printf("%4d: %9.4f %9.4f %9.4f %9.4f\n", pos1, ff.x, ff.y, ff.z, ff.w);
unsigned int offset = atom1.x + atom2.x * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
internalF0.x = ff.x * cp0.x;
force.x += internalF0.x;
internalF0.y = ff.x * cp0.y;
force.y += internalF0.y;
internalF0.z = ff.x * cp0.z;
force.z += internalF0.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 0: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.w + atom2.w * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
internalF3.x = ff.w * cp1.x;
force.x += internalF3.x;
internalF3.y = ff.w * cp1.y;
force.y += internalF3.y;
internalF3.z = ff.w * cp1.z;
force.z += internalF3.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 3: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
s.x = ff.y * internalF0.x - ff.z * internalF3.x;
s.y = ff.y * internalF0.y - ff.z * internalF3.y;
s.z = ff.y * internalF0.z - ff.z * internalF3.z;
offset = atom1.y + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF0.x + s.x;
force.y += -internalF0.y + s.y;
force.z += -internalF0.z + s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 1: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.z + atom2.z * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF3.x - s.x;
force.y += -internalF3.y - s.y;
force.z += -internalF3.z - s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("Dihedral %4d - 2: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
}
pos++;
}
while (pos < gpu->sim.rb_dihedral_offset)
{
unsigned int pos1 = pos - gpu->sim.dihedral_offset;
if (pos1 < gpu->sim.rb_dihedrals)
{
int4 atom1 = gpu->psRbDihedralID1->_pSysStream[0][pos1];
float4 atomA = gpu->psPosq4->_pSysStream[0][atom1.x];
float4 atomB = gpu->psPosq4->_pSysStream[0][atom1.y];
float4 atomC = gpu->psPosq4->_pSysStream[0][atom1.z];
float4 atomD = gpu->psPosq4->_pSysStream[0][atom1.w];
A->v0.x = atomA.x - atomB.x;
A->v0.y = atomA.y - atomB.y;
A->v0.z = atomA.z - atomB.z;
A->v1.x = atomC.x - atomB.x;
A->v1.y = atomC.y - atomB.y;
A->v1.z = atomC.z - atomB.z;
A->v2.x = atomC.x - atomD.x;
A->v2.y = atomC.y - atomD.y;
A->v2.z = atomC.z - atomD.z;
float3 cp0, cp1;
float dihedralAngle, cosPhi;
// printf("%4d - 0 : %9.4f %9.4f %9.4f\n", pos1, A->v0.x, A->v0.y, A->v0.z);
// printf("%4d - 1 : %9.4f %9.4f %9.4f\n", pos1, A->v1.x, A->v1.y, A->v1.z);
// printf("%4d - 2 : %9.4f %9.4f %9.4f\n", pos1, A->v2.x, A->v2.y, A->v2.z);
GETDIHEDRALANGLECOSINEBETWEENTHREEVECTORS(A->v0, A->v1, A->v2, A->v0, cp0, cp1, dihedralAngle, cosPhi);
if (dihedralAngle < 0.0f )
{
dihedralAngle += 3.14159265f;
}
else
{
dihedralAngle -= 3.14159265f;
}
cosPhi = -cosPhi;
// printf("%4d: %9.4f %9.4f\n", pos1, dihedralAngle, cosPhi);
float4 dihedral1 = gpu->psRbDihedralParameter1->_pSysStream[0][pos1];
float2 dihedral2 = gpu->psRbDihedralParameter2->_pSysStream[0][pos1];
float cosFactor = cosPhi;
float dEdAngle = -dihedral1.y;
// printf("%4d - 1: %9.4f %9.4f\n", pos1, dEdAngle, 1.0f);
dEdAngle -= 2.0f * dihedral1.z * cosFactor;
// printf("%4d - 2: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 3.0f * dihedral1.w * cosFactor;
// printf("%4d - 3: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 4.0f * dihedral2.x * cosFactor;
// printf("%4d - 4: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
cosFactor *= cosPhi;
dEdAngle -= 5.0f * dihedral2.y * cosFactor;
// printf("%4d - 5: %9.4f %9.4f\n", pos1, dEdAngle, cosFactor);
dEdAngle *= sin(dihedralAngle);
// printf("%4d - f: %9.4f\n", pos1, dEdAngle);
float normCross1 = DOT3(cp0, cp0);
float normBC = sqrt(DOT3(A->v1, A->v1));
float4 ff;
ff.x = (-dEdAngle * normBC) / normCross1;
float normCross2 = DOT3(cp1, cp1);
ff.w = (dEdAngle * normBC) / normCross2;
float dp = 1.0f / DOT3(A->v1, A->v1);
ff.y = DOT3(A->v0, A->v1) * dp;
ff.z = DOT3(A->v2, A->v1) * dp;
int4 atom2 = gpu->psRbDihedralID2->_pSysStream[0][pos1];
float3 internalF0;
float3 internalF3;
float3 s;
printf("RB Dihedral %4d: %9.4f %9.4f %9.4f %9.4f\n", pos1, ff.x, ff.y, ff.z, ff.w);
unsigned int offset = atom1.x + atom2.x * gpu->sim.stride;
float4 force = gpu->psForce4->_pSysStream[0][offset];
internalF0.x = ff.x * cp0.x;
force.x += internalF0.x;
internalF0.y = ff.x * cp0.y;
force.y += internalF0.y;
internalF0.z = ff.x * cp0.z;
force.z += internalF0.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 0: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.w + atom2.w * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
internalF3.x = ff.w * cp1.x;
force.x += internalF3.x;
internalF3.y = ff.w * cp1.y;
force.y += internalF3.y;
internalF3.z = ff.w * cp1.z;
force.z += internalF3.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 3: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
s.x = ff.y * internalF0.x - ff.z * internalF3.x;
s.y = ff.y * internalF0.y - ff.z * internalF3.y;
s.z = ff.y * internalF0.z - ff.z * internalF3.z;
offset = atom1.y + atom2.y * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF0.x + s.x;
force.y += -internalF0.y + s.y;
force.z += -internalF0.z + s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
printf("RB Dihedral %4d - 1: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
offset = atom1.z + atom2.z * gpu->sim.stride;
force = gpu->psForce4->_pSysStream[0][offset];
force.x += -internalF3.x - s.x;
force.y += -internalF3.y - s.y;
force.z += -internalF3.z - s.z;
gpu->psForce4->_pSysStream[0][offset] = force;
// printf("%4d - 2: %9.4f %9.4f %9.4f\n", pos1, gpu->psForce4->_pSysStream[0][offset], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride], gpu->psForce4->_pSysStream[0][offset + gpu->sim.stride2]);
}
pos++;
}
while (pos < gpu->sim.LJ14_offset)
{
unsigned int pos1 = pos - gpu->sim.rb_dihedral_offset;
if (pos1 < gpu->sim.LJ14s)
{
int4 atom = gpu->psLJ14ID->_pSysStream[0][pos1];
float4 LJ14 = gpu->psLJ14Parameter->_pSysStream[0][pos1];
float4 a1 = gpu->psPosq4->_pSysStream[0][atom.x];
float4 a2 = gpu->psPosq4->_pSysStream[0][atom.y];
float3 d;
d.x = a1.x - a2.x;
d.y = a1.y - a2.y;
d.z = a1.z - a2.z;
float r2 = DOT3(d, d);
float inverseR = 1.0f / sqrt(r2);
float sig2 = inverseR * LJ14.y;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = LJ14.x * (12.0f * sig6 - 6.0f) * sig6;
dEdR += LJ14.z * inverseR;
dEdR *= inverseR * inverseR;
unsigned int offsetA = atom.x + atom.z * gpu->sim.stride;
unsigned int offsetB = atom.y + atom.w * gpu->sim.stride;
float4 forceA = gpu->psForce4->_pSysStream[0][offsetA];
float4 forceB = gpu->psForce4->_pSysStream[0][offsetB];
d.x *= dEdR;
d.y *= dEdR;
d.z *= dEdR;
forceA.x += d.x;
forceA.y += d.y;
forceA.z += d.z;
forceB.x -= d.x;
forceB.y -= d.y;
forceB.z -= d.z;
printf("LJ14 %d: %11.4f %11.4f %11.4f\n", pos1, d.x, d.y, d.z);
gpu->psForce4->_pSysStream[0][offsetA] = forceA;
gpu->psForce4->_pSysStream[0][offsetB] = forceB;
}
pos++;
}
#endif
if
(
violations
>
0
)
{
gpuDumpCoordinates
(
gpu
);
gpuDumpForces
(
gpu
);
}
}
static
FILE
*
getWriteToFilePtr
(
char
*
fname
,
int
step
)
{
std
::
stringstream
fileName
;
fileName
<<
fname
<<
"_"
;
fileName
<<
step
;
fileName
<<
".txt"
;
FILE
*
filePtr
=
fopen
(
fileName
.
str
().
c_str
(),
"w"
);
if
(
filePtr
==
NULL
){
(
void
)
fprintf
(
stderr
,
"Could not open file=<%s> for writitng."
,
fileName
.
str
().
c_str
()
);
exit
(
-
1
);
}
return
filePtr
;
}
extern
"C"
{
static
void
printValues
(
FILE
*
filePtr
,
int
index
,
int
numberOfValues
,
float
*
values
)
{
int
i
;
(
void
)
fprintf
(
filePtr
,
"%5d "
,
index
);
for
(
i
=
0
;
i
<
numberOfValues
;
i
++
)
{
(
void
)
fprintf
(
filePtr
,
" %18.10e"
,
values
[
i
]
);
}
(
void
)
fprintf
(
filePtr
,
"
\n
"
);
(
void
)
fflush
(
filePtr
);
}
}
extern
"C"
void
WriteArrayToFile1
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float
>*
psPos
,
int
numPrint
)
{
int
i
;
static
const
int
numberOfValues
=
1
;
FILE
*
filePtr
=
getWriteToFilePtr
(
fname
,
step
);
float
values
[
numberOfValues
];
psPos
->
Download
();
numPrint
=
(
numPrint
>
0
&&
(
numPrint
<
gpu
->
natoms
))
?
numPrint
:
gpu
->
natoms
;
for
(
i
=
0
;
i
<
numPrint
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
];
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
for
(
i
=
gpu
->
natoms
-
numPrint
;
i
<
gpu
->
natoms
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
];
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
(
void
)
fclose
(
filePtr
);
}
extern
"C"
void
WriteArrayToFile2
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float2
>*
psPos
,
int
numPrint
)
{
int
i
;
static
const
int
numberOfValues
=
2
;
FILE
*
filePtr
=
getWriteToFilePtr
(
fname
,
step
);
float
values
[
numberOfValues
];
psPos
->
Download
();
numPrint
=
(
numPrint
>
0
&&
(
numPrint
<
gpu
->
natoms
))
?
numPrint
:
gpu
->
natoms
;
for
(
i
=
0
;
i
<
numPrint
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
].
x
;
values
[
1
]
=
psPos
->
_pSysStream
[
0
][
i
].
y
;
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
for
(
i
=
gpu
->
natoms
-
numPrint
;
i
<
gpu
->
natoms
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
].
x
;
values
[
1
]
=
psPos
->
_pSysStream
[
0
][
i
].
y
;
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
(
void
)
fclose
(
filePtr
);
}
extern
"C"
void
WriteArrayToFile4
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float4
>*
psPos
,
int
numPrint
)
{
int
i
;
static
const
int
numberOfValues
=
4
;
FILE
*
filePtr
=
getWriteToFilePtr
(
fname
,
step
);
float
values
[
numberOfValues
];
psPos
->
Download
();
numPrint
=
(
numPrint
>
0
&&
(
numPrint
<
gpu
->
natoms
))
?
numPrint
:
gpu
->
natoms
;
for
(
i
=
0
;
i
<
numPrint
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
].
x
;
values
[
1
]
=
psPos
->
_pSysStream
[
0
][
i
].
y
;
values
[
2
]
=
psPos
->
_pSysStream
[
0
][
i
].
z
;
values
[
3
]
=
psPos
->
_pSysStream
[
0
][
i
].
w
;
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
for
(
i
=
gpu
->
natoms
-
numPrint
;
i
<
gpu
->
natoms
;
i
++
)
{
values
[
0
]
=
psPos
->
_pSysStream
[
0
][
i
].
x
;
values
[
1
]
=
psPos
->
_pSysStream
[
0
][
i
].
y
;
values
[
2
]
=
psPos
->
_pSysStream
[
0
][
i
].
z
;
values
[
3
]
=
psPos
->
_pSysStream
[
0
][
i
].
w
;
printValues
(
filePtr
,
i
,
numberOfValues
,
values
);
}
(
void
)
fclose
(
filePtr
);
}
extern
"C"
void
gpuDumpObcInfo
(
gpuContext
gpu
)
{
gpu
->
psPosq4
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psObcData
->
Download
();
gpu
->
psBornSum
->
Download
();
printf
(
"
\n\n
Obc Info xyzw Brad atomR scaledAtomR
\n
"
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
printf
(
"%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
i
].
w
,
gpu
->
psBornRadii
->
_pSysStream
[
0
][
i
],
gpu
->
psBornSum
->
_pSysStream
[
0
][
i
],
gpu
->
psObcData
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psObcData
->
_pSysStream
[
0
][
i
].
y
);
}
}
extern
"C"
void
gpuDumpObcLoop1
(
gpuContext
gpu
)
{
float
compF
;
gpu
->
psForce4
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psObcChain
->
Download
();
gpu
->
psBornSum
->
Download
();
printf
(
"
\n\n
Obc F3 BrnR BrnF Chn
\n
"
);
for
(
int
i
=
0
;
i
<
gpu
->
natoms
;
i
++
)
{
compF
=
gpu
->
psBornForce
->
_pSysStream
[
0
][
i
]
/
(
gpu
->
psBornRadii
->
_pSysStream
[
0
][
i
]
*
gpu
->
psBornRadii
->
_pSysStream
[
0
][
i
]
*
gpu
->
psObcChain
->
_pSysStream
[
0
][
i
]);
printf
(
"%4d: %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f %11.5f
\n
"
,
i
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
i
].
z
,
// gpu->psForce4->_pSysStream[0][i].w,
gpu
->
psBornRadii
->
_pSysStream
[
0
][
i
],
compF
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
i
],
// gpu->psBornSum->_pSysStream[0][i],
gpu
->
psObcChain
->
_pSysStream
[
0
][
i
]
);
}
}
static
void
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
static
void
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
{
// Recursively tag atoms as belonging to a particular molecule.
// Recursively tag atoms as belonging to a particular molecule.
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
ff0a69f9
...
@@ -138,52 +138,25 @@ typedef struct _gpuContext *gpuContext;
...
@@ -138,52 +138,25 @@ typedef struct _gpuContext *gpuContext;
extern
"C"
extern
"C"
bool
gpuIsAvailable
();
bool
gpuIsAvailable
();
extern
"C"
int
gpuReadBondParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
length
,
const
std
::
vector
<
float
>&
k
);
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
length
,
const
std
::
vector
<
float
>&
k
);
extern
"C"
int
gpuReadBondAngleParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
void
gpuSetBondAngleParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
float
>&
angle
,
const
std
::
vector
<
float
>&
k
);
const
std
::
vector
<
float
>&
angle
,
const
std
::
vector
<
float
>&
k
);
extern
"C"
int
gpuReadDihedralParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
void
gpuSetDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
const
std
::
vector
<
float
>&
k
,
const
std
::
vector
<
float
>&
phase
,
const
std
::
vector
<
int
>&
periodicity
);
const
std
::
vector
<
float
>&
k
,
const
std
::
vector
<
float
>&
phase
,
const
std
::
vector
<
int
>&
periodicity
);
extern
"C"
int
gpuReadRbDihedralParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
void
gpuSetRbDihedralParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
int
>&
atom3
,
const
std
::
vector
<
int
>&
atom4
,
const
std
::
vector
<
float
>&
c0
,
const
std
::
vector
<
float
>&
c1
,
const
std
::
vector
<
float
>&
c2
,
const
std
::
vector
<
float
>&
c3
,
const
std
::
vector
<
float
>&
c4
,
const
std
::
vector
<
float
>&
c5
);
const
std
::
vector
<
float
>&
c0
,
const
std
::
vector
<
float
>&
c1
,
const
std
::
vector
<
float
>&
c2
,
const
std
::
vector
<
float
>&
c3
,
const
std
::
vector
<
float
>&
c4
,
const
std
::
vector
<
float
>&
c5
);
extern
"C"
int
gpuReadLJ14Parameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
void
gpuSetLJ14Parameters
(
gpuContext
gpu
,
float
epsfac
,
float
fudge
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q1
,
const
std
::
vector
<
float
>&
q2
);
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q1
,
const
std
::
vector
<
float
>&
q2
);
extern
"C"
float
gpuGetAtomicRadius
(
gpuContext
gpu
,
std
::
string
s
);
extern
"C"
unsigned
char
gpuGetAtomicSymbol
(
gpuContext
gpu
,
std
::
string
s
);
extern
"C"
int
gpuReadAtomicParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
int
gpuReadCoulombParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q
,
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
c6
,
const
std
::
vector
<
float
>&
c12
,
const
std
::
vector
<
float
>&
q
,
const
std
::
vector
<
char
>&
symbol
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
);
const
std
::
vector
<
char
>&
symbol
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
);
...
@@ -197,9 +170,6 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
...
@@ -197,9 +170,6 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
extern
"C"
extern
"C"
void
gpuSetObcParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
scale
);
void
gpuSetObcParameters
(
gpuContext
gpu
,
float
innerDielectric
,
float
solventDielectric
,
const
std
::
vector
<
int
>&
atom
,
const
std
::
vector
<
float
>&
radius
,
const
std
::
vector
<
float
>&
scale
);
extern
"C"
int
gpuReadShakeParameters
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetShakeParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
distance
,
void
gpuSetShakeParameters
(
gpuContext
gpu
,
const
std
::
vector
<
int
>&
atom1
,
const
std
::
vector
<
int
>&
atom2
,
const
std
::
vector
<
float
>&
distance
,
const
std
::
vector
<
float
>&
invMass1
,
const
std
::
vector
<
float
>&
invMass2
,
float
tolerance
);
const
std
::
vector
<
float
>&
invMass1
,
const
std
::
vector
<
float
>&
invMass2
,
float
tolerance
);
...
@@ -207,9 +177,6 @@ void gpuSetShakeParameters(gpuContext gpu, const std::vector<int>& atom1, const
...
@@ -207,9 +177,6 @@ void gpuSetShakeParameters(gpuContext gpu, const std::vector<int>& atom1, const
extern
"C"
extern
"C"
int
gpuAllocateInitialBuffers
(
gpuContext
gpu
);
int
gpuAllocateInitialBuffers
(
gpuContext
gpu
);
extern
"C"
void
gpuReadCoordinates
(
gpuContext
gpu
,
char
*
fname
);
extern
"C"
extern
"C"
void
gpuSetPositions
(
gpuContext
gpu
,
const
std
::
vector
<
float
>&
x
,
const
std
::
vector
<
float
>&
y
,
const
std
::
vector
<
float
>&
z
);
void
gpuSetPositions
(
gpuContext
gpu
,
const
std
::
vector
<
float
>&
x
,
const
std
::
vector
<
float
>&
y
,
const
std
::
vector
<
float
>&
z
);
...
@@ -222,9 +189,6 @@ void gpuSetMass(gpuContext gpu, const std::vector<float>& mass);
...
@@ -222,9 +189,6 @@ void gpuSetMass(gpuContext gpu, const std::vector<float>& mass);
extern
"C"
extern
"C"
void
gpuInitializeRandoms
(
gpuContext
gpu
);
void
gpuInitializeRandoms
(
gpuContext
gpu
);
extern
"C"
void
*
gpuInitFromFile
(
char
*
fname
);
extern
"C"
extern
"C"
void
*
gpuInit
(
int
numAtoms
);
void
*
gpuInit
(
int
numAtoms
);
...
@@ -255,48 +219,6 @@ void gpuBuildExclusionList(gpuContext gpu);
...
@@ -255,48 +219,6 @@ void gpuBuildExclusionList(gpuContext gpu);
extern
"C"
extern
"C"
int
gpuSetConstants
(
gpuContext
gpu
);
int
gpuSetConstants
(
gpuContext
gpu
);
extern
"C"
void
gpuDumpCoordinates
(
gpuContext
gpu
);
extern
"C"
void
gpuDumpPrimeCoordinates
(
gpuContext
gpu
);
extern
"C"
void
gpuDumpForces
(
gpuContext
gpu
);
extern
"C"
void
gpuDumpAtomData
(
gpuContext
gpu
);
extern
"C"
bool
gpuCheckData
(
gpuContext
gpu
);
extern
"C"
void
gpuSetup
(
void
*
pVoid
);
extern
"C"
void
kCPUCalculate14
(
gpuContext
gpu
);
extern
"C"
void
kCPUCalculateLocalForces
(
gpuContext
gpu
);
extern
"C"
void
WriteArrayToFile1
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float
>*
psPos
,
int
numPrint
);
extern
"C"
void
WriteArrayToFile2
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float2
>*
psPos
,
int
numPrint
);
extern
"C"
void
WriteArrayToFile3
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float3
>*
psPos
,
int
numPrint
);
extern
"C"
void
WriteArrayToFile4
(
gpuContext
gpu
,
char
*
fname
,
int
step
,
CUDAStream
<
float4
>*
psPos
,
int
numPrint
);
extern
"C"
void
gpuDumpObcInfo
(
gpuContext
gpu
);
extern
"C"
void
gpuDumpObcLoop1
(
gpuContext
gpu
);
extern
"C"
extern
"C"
void
gpuReorderAtoms
(
gpuContext
gpu
);
void
gpuReorderAtoms
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
ff0a69f9
...
@@ -152,18 +152,6 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
...
@@ -152,18 +152,6 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
// printf("kReduceObcGbsaBornSum\n");
// printf("kReduceObcGbsaBornSum\n");
kReduceObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kReduceObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
gpu
->
bRecalculateBornRadii
=
false
;
if
(
0
){
static
int
step
=
0
;
int
numPrint
=
-
1
;
step
++
;
WriteArrayToFile1
(
gpu
,
"ObcGbsaBornBRad"
,
step
,
gpu
->
psBornRadii
,
numPrint
);
WriteArrayToFile1
(
gpu
,
"ObcGbsaBornSum"
,
step
,
gpu
->
psBornSum
,
numPrint
);
WriteArrayToFile2
(
gpu
,
"ObcGbsaObcData"
,
step
,
gpu
->
psObcData
,
numPrint
);
WriteArrayToFile4
(
gpu
,
"ObcGbsaBornPos"
,
step
,
gpu
->
psPosq4
,
numPrint
);
//gpuDumpCoordinates( gpu );
gpuDumpObcInfo
(
gpu
);
}
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
}
}
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces1.cu
deleted
100755 → 0
View file @
a56ce87d
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
q
;
float
br
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
};
__shared__
Atom
sA
[
G8X_NONBOND_THREADS_PER_BLOCK
];
__shared__
unsigned
int
sWorkUnit
[
G8X_NONBOND_WORKUNITS_PER_SM
];
__shared__
unsigned
int
sNext
[
GRID
];
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateObcGbsaForces1Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateObcGbsaForces1Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
// __syncthreads();
//printf("%4d: %9.4f %9.4f %9.4f\n", pos, totalForce, bornRadius, obcChain);
//totalForce = 0.0f;
// if (bornRadius > 0.0f)
// {
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
//float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
float
saTerm
=
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
// 1.102 == Temp mysterious fudge factor, FIX FIX FIX
// }
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
void
kReduceObcGbsaBornForces1_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
//float2 obcData = cSim.pObcData[pos];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
// __syncthreads();
//printf("%4d: %9.4f %9.4f %9.4f\n", pos, totalForce, bornRadius, obcChain);
//totalForce = 0.0f;
/*
// if (bornRadius > 0.0f)
// {
float r = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
float ratio6 = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
totalForce += saTerm / bornRadius; // 1.102 == Temp mysterious fudge factor, FIX FIX FIX
// }
*/
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
cSim
.
pBornForce
[
pos
]
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
void
kAceGbsa_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
totalForce
=
cSim
.
pBornForce
[
pos
];
//float totalForce = 0.0f;
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
/*
float ratio6 = (obcData.x + cSim.dielectricOffset) / bornRadius;
ratio6 = ratio6*ratio6;
ratio6 = ratio6*ratio6*ratio6;
*/
//float saTerm = 41.84f*cSim.surfaceAreaFactor * r * r * ratio6;
float
saTerm
=
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
// 1.102 == Temp mysterious fudge factor, FIX FIX FIX
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
cSim
.
pBornForce
[
pos
]
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
//printf("kReduceObcGbsaBornForces QQ\n");
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bf_reduce_threads_per_block
>>>
();
//kReduceObcGbsaBornForces1_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
//kAceGbsa_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
//printf("kReduceObcGbsaBornForces calling gpuDumpObcLoop1 QQ\n");
//gpuDumpObcLoop1(gpu);
}
__global__
void
kCalculateObcGbsaForces1_kernel
()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int
pos
=
cSim
.
nbWorkUnitsPerBlock
*
blockIdx
.
x
+
min
(
blockIdx
.
x
,
cSim
.
nbWorkUnitsPerBlockRemainder
);
int
end
=
cSim
.
nbWorkUnitsPerBlock
*
(
blockIdx
.
x
+
1
)
+
min
((
blockIdx
.
x
+
1
),
cSim
.
nbWorkUnitsPerBlockRemainder
);
if
(
threadIdx
.
x
<
end
-
pos
)
{
sWorkUnit
[
threadIdx
.
x
]
=
cSim
.
pWorkUnit
[
pos
+
threadIdx
.
x
];
}
if
(
threadIdx
.
x
<
GRID
)
{
sNext
[
threadIdx
.
x
]
=
(
threadIdx
.
x
+
1
)
&
(
GRID
-
1
);
}
__syncthreads
();
// Now change pos and end to reflect work queue just read
// into shared memory
end
=
end
-
pos
;
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
sWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float4
af
;
// Local atom fx, fy, fz, fb
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
br
=
br
;
af
.
x
=
0.0
f
;
af
.
y
=
0.0
f
;
af
.
z
=
0.0
f
;
af
.
w
=
0.0
f
;
apos
.
w
*=
cSim
.
preFactor
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
apos
.
w
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dr
=
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
);
float
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
);
dx
*=
dGpol_dr
;
dy
*=
dGpol_dr
;
dz
*=
dGpol_dr
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
}
// Write results
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float
temp1
=
cSim
.
pBornRadii
[
j
];
apos
=
cSim
.
pPosq
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
br
=
temp1
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
sA
[
threadIdx
.
x
].
fb
=
af
.
w
=
0.0
f
;
apos
.
w
*=
cSim
.
preFactor
;
for
(
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
apos
.
w
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dr
=
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
);
float
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
);
dx
*=
dGpol_dr
;
dy
*=
dGpol_dr
;
dz
*=
dGpol_dr
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
tj
=
sNext
[
tj
];
}
// Write results
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
af
.
x
=
sA
[
threadIdx
.
x
].
fx
;
af
.
y
=
sA
[
threadIdx
.
x
].
fy
;
af
.
z
=
sA
[
threadIdx
.
x
].
fz
;
af
.
w
=
sA
[
threadIdx
.
x
].
fb
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
}
pos
-=
cSim
.
nonbond_workBlock
;
}
}
__global__
extern
void
kCalculateObcGbsaForces1_12_kernel
();
void
kCalculateObcGbsaForces1
(
gpuContext
gpu
)
{
//printf("kCalculateObcGbsaForces1 version=%d sm_12=%d QQ\n", gpu->sm_version, SM_12);
if
(
gpu
->
sm_version
<
SM_12
)
kCalculateObcGbsaForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
else
kCalculateObcGbsaForces1_12_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateObcGbsaForce1"
);
}
platforms/cuda/src/kernels/kCalculateObcGbsaForces1_12.cu
deleted
100755 → 0
View file @
a56ce87d
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
q
;
float
br
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
};
__shared__
Atom
sA
[
GT2XX_NONBOND_THREADS_PER_BLOCK
];
__shared__
unsigned
int
sWorkUnit
[
GT2XX_NONBOND_WORKUNITS_PER_SM
];
__shared__
unsigned
int
sNext
[
GRID
];
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateObcGbsaForces1_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateObcGbsaForces1_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kCalculateObcGbsaForces1_12_kernel
()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int
pos
=
cSim
.
nbWorkUnitsPerBlock
*
blockIdx
.
x
+
min
(
blockIdx
.
x
,
cSim
.
nbWorkUnitsPerBlockRemainder
);
int
end
=
cSim
.
nbWorkUnitsPerBlock
*
(
blockIdx
.
x
+
1
)
+
min
((
blockIdx
.
x
+
1
),
cSim
.
nbWorkUnitsPerBlockRemainder
);
if
(
threadIdx
.
x
<
end
-
pos
)
{
sWorkUnit
[
threadIdx
.
x
]
=
cSim
.
pWorkUnit
[
pos
+
threadIdx
.
x
];
}
if
(
threadIdx
.
x
<
GRID
)
{
sNext
[
threadIdx
.
x
]
=
(
threadIdx
.
x
+
1
)
&
(
GRID
-
1
);
}
__syncthreads
();
// Now change pos and end to reflect work queue just read
// into shared memory
end
=
end
-
pos
;
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
sWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float4
af
;
// Local atom fx, fy, fz, fb
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
br
=
br
;
af
.
x
=
0.0
f
;
af
.
y
=
0.0
f
;
af
.
z
=
0.0
f
;
af
.
w
=
0.0
f
;
apos
.
w
*=
cSim
.
preFactor
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
apos
.
w
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dr
=
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
);
float
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
);
dx
*=
dGpol_dr
;
dy
*=
dGpol_dr
;
dz
*=
dGpol_dr
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
}
// Write results
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float
temp1
=
cSim
.
pBornRadii
[
j
];
apos
=
cSim
.
pPosq
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
br
=
temp1
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
sA
[
threadIdx
.
x
].
fb
=
af
.
w
=
0.0
f
;
apos
.
w
*=
cSim
.
preFactor
;
for
(
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
apos
.
w
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dr
=
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
);
float
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
);
dx
*=
dGpol_dr
;
dy
*=
dGpol_dr
;
dz
*=
dGpol_dr
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
tj
=
sNext
[
tj
];
}
// Write results
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
af
.
x
=
sA
[
threadIdx
.
x
].
fx
;
af
.
y
=
sA
[
threadIdx
.
x
].
fy
;
af
.
z
=
sA
[
threadIdx
.
x
].
fz
;
af
.
w
=
sA
[
threadIdx
.
x
].
fb
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
}
pos
-=
cSim
.
nonbond_workBlock
;
}
}
void
kCalculateObcGbsaForces1_12
(
gpuContext
gpu
)
{
// printf("kCalculateObcGbsaForces1_12\n");
kCalculateObcGbsaForces1_12_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateObcGbsaForce1_12"
);
}
platforms/cuda/src/kernels/kForces.cu
View file @
ff0a69f9
...
@@ -259,3 +259,64 @@ void kReduceForces(gpuContext gpu)
...
@@ -259,3 +259,64 @@ void kReduceForces(gpuContext gpu)
LAUNCHERROR
(
"kReduceForces"
);
LAUNCHERROR
(
"kReduceForces"
);
}
}
__global__
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
//float saTerm = cSim.surfaceAreaFactor * r * r * ratio6;
float
saTerm
=
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
// 1.102 == Temp mysterious fudge factor, FIX FIX FIX
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
//printf("kReduceObcGbsaBornForces\n");
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaBornForces"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment