Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
630bdd40
"platforms/hip/tests/TestHipNoseHooverIntegrator.cpp" did not exist on "64493da66474b3874c798d21d81b23fd5560134c"
Commit
630bdd40
authored
Feb 20, 2009
by
Peter Eastman
Browse files
Added other files that somehow got ommitted from earlier checkin
parent
9b7a9f65
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1762 additions
and
0 deletions
+1762
-0
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+337
-0
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+407
-0
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+225
-0
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+290
-0
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+114
-0
platforms/cuda/src/kernels/kSettle.cu
platforms/cuda/src/kernels/kSettle.cu
+389
-0
No files found.
platforms/cuda/src/kernels/kCalculateCDLJForces.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float3
af
;
// Local atom fx, fy, fz
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
invR
;
float
sig
;
float
sig2
;
float
sig6
;
float
eps
;
float
dEdR
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pAttr
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
lasty
=
y
;
}
pos
++
;
}
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating nonbonded forces and the first stage of GBSA.
* It is included several times in kCalculateCDLJObcGbsaForces1.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
float4
af
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
af
.
w
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
float
q2
=
cSim
.
preFactor
*
apos
.
w
;
apos
.
w
*=
cSim
.
epsfac
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
sA
[
threadIdx
.
x
].
br
=
br
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0
.
0
f
;
}
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
#if defined USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#elif defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
float4
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
of
.
w
+=
af
.
w
;
cSim
.
pForce4a
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
#else
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pAttr
[
j
];
sA
[
threadIdx
.
x
].
br
=
cSim
.
pBornRadii
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fb
=
0
.
0
f
;
float
q2
=
apos
.
w
*
cSim
.
preFactor
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0
.
0
f
;
}
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
#if defined USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#elif defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
float4
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
of
.
w
+=
af
.
w
;
cSim
.
pForce4a
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
of
.
w
+=
sA
[
threadIdx
.
x
].
fb
;
cSim
.
pForce4a
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
#else
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
af
.
x
=
sA
[
threadIdx
.
x
].
fx
;
af
.
y
=
sA
[
threadIdx
.
x
].
fy
;
af
.
z
=
sA
[
threadIdx
.
x
].
fz
;
af
.
w
=
sA
[
threadIdx
.
x
].
fb
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
af
;
cSim
.
pBornForce
[
offset
]
=
af
.
w
;
#endif
lasty
=
y
;
}
pos
++
;
}
}
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for calculating Born sums. It is included
* several times in kCalculateObcGbsaBornSum.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
,
int
workUnits
)
{
extern
__shared__
Atom
sA
[];
int
end
=
workUnits
/
gridDim
.
x
;
int
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
#endif
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
+
(
blockIdx
.
x
*
workUnits
)
/
gridDim
.
x
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
r
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#if defined USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
{
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
if
((
j
!=
tgx
)
&&
(
ar
.
x
<
rScaledRadiusJ
))
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
apos
.
w
+=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
ar
.
x
<
(
psA
[
j
].
r
-
r
))
{
apos
.
w
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
}
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
#else
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sum
=
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
{
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
ar
.
x
<
(
psA
[
tj
].
sr
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
term
;
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
tj
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
psA
[
tj
].
r
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
tj
].
r
)
-
l_ij
);
}
psA
[
tj
].
sum
+=
term
;
}
}
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
sA
[
threadIdx
.
x
].
sum
;
#else
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
sA
[
threadIdx
.
x
].
sum
;
#endif
}
pos
-=
cSim
.
nonbond_workBlock
;
}
}
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateObcGbsaForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pObcData
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
a
.
x
;
sA
[
threadIdx
.
x
].
sr
=
a
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
a
.
y
*
a
.
y
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Atom I Born forces and sum
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
float
l_ij
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
r2Inverse
=
rInverse
*
rInverse
;
float
t1
=
log
(
u_ij
/
l_ij
);
float
t2
=
(
l_ij2
-
u_ij2
);
float
t3
=
t2
*
rInverse
;
t1
*=
rInverse
;
// Born Forces term
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
j
].
sr2
*
r2Inverse
)
*
t3
+
0
.
250
f
*
t1
*
r2Inverse
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
j
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
j
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
j
].
fz
+=
d
;
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#else
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
sA
[
threadIdx
.
x
].
fb
=
cSim
.
pBornForce
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
temp1
.
y
*
temp1
.
y
;
}
float
sr2
=
a
.
y
*
a
.
y
;
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
r2Inverse
=
1
.
0
f
/
r2
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ijJ
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
l_ijI
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
a
.
y
));
float
u_ijJ
=
1
.
0
f
/
rScaledRadiusJ
;
float
u_ijI
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t3J
=
t2J
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
t1J
*=
rInverse
;
t1I
*=
rInverse
;
// Born Forces term
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
tj
].
sr2
*
r2Inverse
)
*
t3J
+
0
.
250
f
*
t1J
*
r2Inverse
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
// Atom J Born sum term
term
=
0
.
125
f
*
(
1
.
000
f
+
sr2
*
r2Inverse
)
*
t3I
+
0
.
250
f
*
t1I
*
r2Inverse
;
dE
=
psA
[
tj
].
fb
*
term
;
#ifdef USE_PERIODIC
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
)
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4b
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#else
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#endif
}
lasty
=
y
;
pos
++
;
}
}
platforms/cuda/src/kernels/kFindInteractingBlocks.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for identifying interacting blocks. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
/**
* Find a bounding box for the atoms in each block.
*/
__global__
void
METHOD_NAME
(
kFindBlockBounds
,
_kernel
)()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
base
=
pos
<<
GRIDBITS
;
if
(
base
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
base
];
#ifdef USE_PERIODIC
apos
.
x
-=
floor
(
apos
.
x
/
cSim
.
periodicBoxSizeX
)
*
cSim
.
periodicBoxSizeX
;
apos
.
y
-=
floor
(
apos
.
y
/
cSim
.
periodicBoxSizeY
)
*
cSim
.
periodicBoxSizeY
;
apos
.
z
-=
floor
(
apos
.
z
/
cSim
.
periodicBoxSizeZ
)
*
cSim
.
periodicBoxSizeZ
;
float4
firstPoint
=
apos
;
#endif
float
minx
=
apos
.
x
;
float
maxx
=
apos
.
x
;
float
miny
=
apos
.
y
;
float
maxy
=
apos
.
y
;
float
minz
=
apos
.
z
;
float
maxz
=
apos
.
z
;
for
(
unsigned
int
i
=
1
;
i
<
GRID
;
i
++
)
{
apos
=
cSim
.
pPosq
[
base
+
i
];
#ifdef USE_PERIODIC
apos
.
x
-=
floor
((
apos
.
x
-
firstPoint
.
x
)
/
cSim
.
periodicBoxSizeX
+
0
.
5
)
*
cSim
.
periodicBoxSizeX
;
apos
.
y
-=
floor
((
apos
.
y
-
firstPoint
.
y
)
/
cSim
.
periodicBoxSizeY
+
0
.
5
)
*
cSim
.
periodicBoxSizeY
;
apos
.
z
-=
floor
((
apos
.
z
-
firstPoint
.
z
)
/
cSim
.
periodicBoxSizeZ
+
0
.
5
)
*
cSim
.
periodicBoxSizeZ
;
#endif
minx
=
min
(
minx
,
apos
.
x
);
maxx
=
max
(
maxx
,
apos
.
x
);
miny
=
min
(
miny
,
apos
.
y
);
maxy
=
max
(
maxy
,
apos
.
y
);
minz
=
min
(
minz
,
apos
.
z
);
maxz
=
max
(
maxz
,
apos
.
z
);
}
cSim
.
pGridBoundingBox
[
pos
]
=
make_float4
(
0
.
5
f
*
(
maxx
-
minx
),
0
.
5
f
*
(
maxy
-
miny
),
0
.
5
f
*
(
maxz
-
minz
),
0
);
cSim
.
pGridCenter
[
pos
]
=
make_float4
(
0
.
5
f
*
(
maxx
+
minx
),
0
.
5
f
*
(
maxy
+
miny
),
0
.
5
f
*
(
maxz
+
minz
),
0
);
}
}
/**
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
*/
__global__
void
METHOD_NAME
(
kFindBlocksWithInteractions
,
_kernel
)()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
pos
<
cSim
.
workUnits
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
cSim
.
pWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
);
x
=
(
x
>>
17
);
// Find the distance between the bounding boxes of the two cells.
float4
centera
=
cSim
.
pGridCenter
[
x
];
float4
centerb
=
cSim
.
pGridCenter
[
y
];
float
dx
=
centera
.
x
-
centerb
.
x
;
float
dy
=
centera
.
y
-
centerb
.
y
;
float
dz
=
centera
.
z
-
centerb
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float4
boxSizea
=
cSim
.
pGridBoundingBox
[
x
];
float4
boxSizeb
=
cSim
.
pGridBoundingBox
[
y
];
dx
=
max
(
0
.
0
f
,
abs
(
dx
)
-
boxSizea
.
x
-
boxSizeb
.
x
);
dy
=
max
(
0
.
0
f
,
abs
(
dy
)
-
boxSizea
.
y
-
boxSizeb
.
y
);
dz
=
max
(
0
.
0
f
,
abs
(
dz
)
-
boxSizea
.
z
-
boxSizeb
.
z
);
cSim
.
pInteractionFlag
[
pos
]
=
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cSim
.
nonbondedCutoffSqr
?
0
:
1
);
}
}
platforms/cuda/src/kernels/kSettle.cu
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using
namespace
std
;
#define DeltaShake
#include "gputypes.h"
static
__constant__
cudaGmxSimulation
cSim
;
void
SetSettleSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetSettleSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
/**
* This is based on the setlep FORTRAN routine by Shuichi Miyamoto. See
* S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992).
*/
__global__
void
kApplyFirstSettle_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
{
int4
atomID
=
cSim
.
pSettleID
[
pos
];
float2
params
=
cSim
.
pSettleParameter
[
pos
];
float4
apos0
=
cSim
.
pOldPosq
[
atomID
.
x
];
float4
xp0
=
cSim
.
pPosqP
[
atomID
.
x
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xp1
=
cSim
.
pPosqP
[
atomID
.
y
];
float4
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
float4
xp2
=
cSim
.
pPosqP
[
atomID
.
z
];
float
m0
=
1.0
/
cSim
.
pVelm4
[
atomID
.
x
].
w
;
float
m1
=
1.0
/
cSim
.
pVelm4
[
atomID
.
y
].
w
;
float
m2
=
1.0
/
cSim
.
pVelm4
[
atomID
.
z
].
w
;
float
xb0
=
apos1
.
x
-
apos0
.
x
;
float
yb0
=
apos1
.
y
-
apos0
.
y
;
float
zb0
=
apos1
.
z
-
apos0
.
z
;
float
xc0
=
apos2
.
x
-
apos0
.
x
;
float
yc0
=
apos2
.
y
-
apos0
.
y
;
float
zc0
=
apos2
.
z
-
apos0
.
z
;
float
dist1
=
sqrt
(
xb0
*
xb0
+
yb0
*
yb0
+
zb0
*
zb0
);
float
dist2
=
sqrt
(
xc0
*
xc0
+
yc0
*
yc0
+
zc0
*
zc0
);
float
totalMass
=
m0
+
m1
+
m2
;
float
xcom
=
((
apos0
.
x
+
xp0
.
x
)
*
m0
+
(
apos1
.
x
+
xp1
.
x
)
*
m1
+
(
apos2
.
x
+
xp2
.
x
)
*
m2
)
/
totalMass
;
float
ycom
=
((
apos0
.
y
+
xp0
.
y
)
*
m0
+
(
apos1
.
y
+
xp1
.
y
)
*
m1
+
(
apos2
.
y
+
xp2
.
y
)
*
m2
)
/
totalMass
;
float
zcom
=
((
apos0
.
z
+
xp0
.
z
)
*
m0
+
(
apos1
.
z
+
xp1
.
z
)
*
m1
+
(
apos2
.
z
+
xp2
.
z
)
*
m2
)
/
totalMass
;
float
xa1
=
apos0
.
x
+
xp0
.
x
-
xcom
;
float
ya1
=
apos0
.
y
+
xp0
.
y
-
ycom
;
float
za1
=
apos0
.
z
+
xp0
.
z
-
zcom
;
float
xb1
=
apos1
.
x
+
xp1
.
x
-
xcom
;
float
yb1
=
apos1
.
y
+
xp1
.
y
-
ycom
;
float
zb1
=
apos1
.
z
+
xp1
.
z
-
zcom
;
float
xc1
=
apos2
.
x
+
xp2
.
x
-
xcom
;
float
yc1
=
apos2
.
y
+
xp2
.
y
-
ycom
;
float
zc1
=
apos2
.
z
+
xp2
.
z
-
zcom
;
float
xaksZd
=
yb0
*
zc0
-
zb0
*
yc0
;
float
yaksZd
=
zb0
*
xc0
-
xb0
*
zc0
;
float
zaksZd
=
xb0
*
yc0
-
yb0
*
xc0
;
float
xaksXd
=
ya1
*
zaksZd
-
za1
*
yaksZd
;
float
yaksXd
=
za1
*
xaksZd
-
xa1
*
zaksZd
;
float
zaksXd
=
xa1
*
yaksZd
-
ya1
*
xaksZd
;
float
xaksYd
=
yaksZd
*
zaksXd
-
zaksZd
*
yaksXd
;
float
yaksYd
=
zaksZd
*
xaksXd
-
xaksZd
*
zaksXd
;
float
zaksYd
=
xaksZd
*
yaksXd
-
yaksZd
*
xaksXd
;
float
axlng
=
sqrt
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
float
aylng
=
sqrt
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
float
azlng
=
sqrt
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
float
trns11
=
xaksXd
/
axlng
;
float
trns21
=
yaksXd
/
axlng
;
float
trns31
=
zaksXd
/
axlng
;
float
trns12
=
xaksYd
/
aylng
;
float
trns22
=
yaksYd
/
aylng
;
float
trns32
=
zaksYd
/
aylng
;
float
trns13
=
xaksZd
/
azlng
;
float
trns23
=
yaksZd
/
azlng
;
float
trns33
=
zaksZd
/
azlng
;
float
xb0d
=
trns11
*
xb0
+
trns21
*
yb0
+
trns31
*
zb0
;
float
yb0d
=
trns12
*
xb0
+
trns22
*
yb0
+
trns32
*
zb0
;
float
xc0d
=
trns11
*
xc0
+
trns21
*
yc0
+
trns31
*
zc0
;
float
yc0d
=
trns12
*
xc0
+
trns22
*
yc0
+
trns32
*
zc0
;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float
za1d
=
trns13
*
xa1
+
trns23
*
ya1
+
trns33
*
za1
;
float
xb1d
=
trns11
*
xb1
+
trns21
*
yb1
+
trns31
*
zb1
;
float
yb1d
=
trns12
*
xb1
+
trns22
*
yb1
+
trns32
*
zb1
;
float
zb1d
=
trns13
*
xb1
+
trns23
*
yb1
+
trns33
*
zb1
;
float
xc1d
=
trns11
*
xc1
+
trns21
*
yc1
+
trns31
*
zc1
;
float
yc1d
=
trns12
*
xc1
+
trns22
*
yc1
+
trns32
*
zc1
;
float
zc1d
=
trns13
*
xc1
+
trns23
*
yc1
+
trns33
*
zc1
;
// --- Step2 A2' ---
float
rc
=
0.5
*
params
.
y
;
float
rb
=
sqrt
(
params
.
x
*
params
.
x
-
rc
*
rc
);
float
ra
=
rb
*
(
m1
+
m2
)
/
totalMass
;
rb
-=
ra
;
float
sinphi
=
za1d
/
ra
;
float
cosphi
=
sqrt
(
1.0
-
sinphi
*
sinphi
);
float
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2
*
rc
*
cosphi
);
float
cospsi
=
sqrt
(
1.0
-
sinpsi
*
sinpsi
);
float
ya2d
=
ra
*
cosphi
;
float
xb2d
=
-
rc
*
cospsi
;
float
yb2d
=
-
rb
*
cosphi
-
rc
*
sinpsi
*
sinphi
;
float
yc2d
=
-
rb
*
cosphi
+
rc
*
sinpsi
*
sinphi
;
float
xb2d2
=
xb2d
*
xb2d
;
float
hh2
=
4.0
*
xb2d2
+
(
yb2d
-
yc2d
)
*
(
yb2d
-
yc2d
)
+
(
zb1d
-
zc1d
)
*
(
zb1d
-
zc1d
);
float
deltx
=
2.0
*
xb2d
+
sqrt
(
4.0
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
xb2d
-=
deltx
*
0.5
;
// --- Step3 al,be,ga ---
float
alpa
=
(
xb2d
*
(
xb0d
-
xc0d
)
+
yb0d
*
yb2d
+
yc0d
*
yc2d
);
float
beta
=
(
xb2d
*
(
yc0d
-
yb0d
)
+
xb0d
*
yb2d
+
xc0d
*
yc2d
);
float
gama
=
xb0d
*
yb1d
-
xb1d
*
yb0d
+
xc0d
*
yc1d
-
xc1d
*
yc0d
;
float
al2be2
=
alpa
*
alpa
+
beta
*
beta
;
float
sinthe
=
(
alpa
*
gama
-
beta
*
sqrt
(
al2be2
-
gama
*
gama
)
)
/
al2be2
;
// --- Step4 A3' ---
float
costhe
=
sqrt
(
1.0
-
sinthe
*
sinthe
);
float
xa3d
=
-
ya2d
*
sinthe
;
float
ya3d
=
ya2d
*
costhe
;
float
za3d
=
za1d
;
float
xb3d
=
xb2d
*
costhe
-
yb2d
*
sinthe
;
float
yb3d
=
xb2d
*
sinthe
+
yb2d
*
costhe
;
float
zb3d
=
zb1d
;
float
xc3d
=
-
xb2d
*
costhe
-
yc2d
*
sinthe
;
float
yc3d
=
-
xb2d
*
sinthe
+
yc2d
*
costhe
;
float
zc3d
=
zc1d
;
// --- Step5 A3 ---
float
xa3
=
trns11
*
xa3d
+
trns12
*
ya3d
+
trns13
*
za3d
;
float
ya3
=
trns21
*
xa3d
+
trns22
*
ya3d
+
trns23
*
za3d
;
float
za3
=
trns31
*
xa3d
+
trns32
*
ya3d
+
trns33
*
za3d
;
float
xb3
=
trns11
*
xb3d
+
trns12
*
yb3d
+
trns13
*
zb3d
;
float
yb3
=
trns21
*
xb3d
+
trns22
*
yb3d
+
trns23
*
zb3d
;
float
zb3
=
trns31
*
xb3d
+
trns32
*
yb3d
+
trns33
*
zb3d
;
float
xc3
=
trns11
*
xc3d
+
trns12
*
yc3d
+
trns13
*
zc3d
;
float
yc3
=
trns21
*
xc3d
+
trns22
*
yc3d
+
trns23
*
zc3d
;
float
zc3
=
trns31
*
xc3d
+
trns32
*
yc3d
+
trns33
*
zc3d
;
xp0
.
x
=
xcom
+
xa3
-
apos0
.
x
;
xp0
.
y
=
ycom
+
ya3
-
apos0
.
y
;
xp0
.
z
=
zcom
+
za3
-
apos0
.
z
;
xp1
.
x
=
xcom
+
xb3
-
apos1
.
x
;
xp1
.
y
=
ycom
+
yb3
-
apos1
.
y
;
xp1
.
z
=
zcom
+
zb3
-
apos1
.
z
;
xp2
.
x
=
xcom
+
xc3
-
apos2
.
x
;
xp2
.
y
=
ycom
+
yc3
-
apos2
.
y
;
xp2
.
z
=
zcom
+
zc3
-
apos2
.
z
;
cSim
.
pPosqP
[
atomID
.
x
]
=
xp0
;
cSim
.
pPosqP
[
atomID
.
y
]
=
xp1
;
cSim
.
pPosqP
[
atomID
.
z
]
=
xp2
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kApplyFirstSettle
(
gpuContext
gpu
)
{
// printf("kApplyFirstSettle\n");
if
(
gpu
->
sim
.
settleConstraints
>
0
)
{
kApplyFirstSettle_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
settle_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplyFirstSettle"
);
}
}
__global__
void
kApplySecondSettle_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
{
int4
atomID
=
cSim
.
pSettleID
[
pos
];
float2
params
=
cSim
.
pSettleParameter
[
pos
];
float4
apos0
=
cSim
.
pOldPosq
[
atomID
.
x
];
float4
xp0
=
cSim
.
pPosq
[
atomID
.
x
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xp1
=
cSim
.
pPosq
[
atomID
.
y
];
float4
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
float4
xp2
=
cSim
.
pPosq
[
atomID
.
z
];
float
m0
=
1.0
/
cSim
.
pVelm4
[
atomID
.
x
].
w
;
float
m1
=
1.0
/
cSim
.
pVelm4
[
atomID
.
y
].
w
;
float
m2
=
1.0
/
cSim
.
pVelm4
[
atomID
.
z
].
w
;
float
xb0
=
apos1
.
x
-
apos0
.
x
;
float
yb0
=
apos1
.
y
-
apos0
.
y
;
float
zb0
=
apos1
.
z
-
apos0
.
z
;
float
xc0
=
apos2
.
x
-
apos0
.
x
;
float
yc0
=
apos2
.
y
-
apos0
.
y
;
float
zc0
=
apos2
.
z
-
apos0
.
z
;
float
dist1
=
sqrt
(
xb0
*
xb0
+
yb0
*
yb0
+
zb0
*
zb0
);
float
dist2
=
sqrt
(
xc0
*
xc0
+
yc0
*
yc0
+
zc0
*
zc0
);
float
totalMass
=
m0
+
m1
+
m2
;
float
xcom
=
((
apos0
.
x
+
xp0
.
x
)
*
m0
+
(
apos1
.
x
+
xp1
.
x
)
*
m1
+
(
apos2
.
x
+
xp2
.
x
)
*
m2
)
/
totalMass
;
float
ycom
=
((
apos0
.
y
+
xp0
.
y
)
*
m0
+
(
apos1
.
y
+
xp1
.
y
)
*
m1
+
(
apos2
.
y
+
xp2
.
y
)
*
m2
)
/
totalMass
;
float
zcom
=
((
apos0
.
z
+
xp0
.
z
)
*
m0
+
(
apos1
.
z
+
xp1
.
z
)
*
m1
+
(
apos2
.
z
+
xp2
.
z
)
*
m2
)
/
totalMass
;
float
xa1
=
apos0
.
x
+
xp0
.
x
-
xcom
;
float
ya1
=
apos0
.
y
+
xp0
.
y
-
ycom
;
float
za1
=
apos0
.
z
+
xp0
.
z
-
zcom
;
float
xb1
=
apos1
.
x
+
xp1
.
x
-
xcom
;
float
yb1
=
apos1
.
y
+
xp1
.
y
-
ycom
;
float
zb1
=
apos1
.
z
+
xp1
.
z
-
zcom
;
float
xc1
=
apos2
.
x
+
xp2
.
x
-
xcom
;
float
yc1
=
apos2
.
y
+
xp2
.
y
-
ycom
;
float
zc1
=
apos2
.
z
+
xp2
.
z
-
zcom
;
float
xaksZd
=
yb0
*
zc0
-
zb0
*
yc0
;
float
yaksZd
=
zb0
*
xc0
-
xb0
*
zc0
;
float
zaksZd
=
xb0
*
yc0
-
yb0
*
xc0
;
float
xaksXd
=
ya1
*
zaksZd
-
za1
*
yaksZd
;
float
yaksXd
=
za1
*
xaksZd
-
xa1
*
zaksZd
;
float
zaksXd
=
xa1
*
yaksZd
-
ya1
*
xaksZd
;
float
xaksYd
=
yaksZd
*
zaksXd
-
zaksZd
*
yaksXd
;
float
yaksYd
=
zaksZd
*
xaksXd
-
xaksZd
*
zaksXd
;
float
zaksYd
=
xaksZd
*
yaksXd
-
yaksZd
*
xaksXd
;
float
axlng
=
sqrt
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
float
aylng
=
sqrt
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
float
azlng
=
sqrt
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
float
trns11
=
xaksXd
/
axlng
;
float
trns21
=
yaksXd
/
axlng
;
float
trns31
=
zaksXd
/
axlng
;
float
trns12
=
xaksYd
/
aylng
;
float
trns22
=
yaksYd
/
aylng
;
float
trns32
=
zaksYd
/
aylng
;
float
trns13
=
xaksZd
/
azlng
;
float
trns23
=
yaksZd
/
azlng
;
float
trns33
=
zaksZd
/
azlng
;
float
xb0d
=
trns11
*
xb0
+
trns21
*
yb0
+
trns31
*
zb0
;
float
yb0d
=
trns12
*
xb0
+
trns22
*
yb0
+
trns32
*
zb0
;
float
xc0d
=
trns11
*
xc0
+
trns21
*
yc0
+
trns31
*
zc0
;
float
yc0d
=
trns12
*
xc0
+
trns22
*
yc0
+
trns32
*
zc0
;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float
za1d
=
trns13
*
xa1
+
trns23
*
ya1
+
trns33
*
za1
;
float
xb1d
=
trns11
*
xb1
+
trns21
*
yb1
+
trns31
*
zb1
;
float
yb1d
=
trns12
*
xb1
+
trns22
*
yb1
+
trns32
*
zb1
;
float
zb1d
=
trns13
*
xb1
+
trns23
*
yb1
+
trns33
*
zb1
;
float
xc1d
=
trns11
*
xc1
+
trns21
*
yc1
+
trns31
*
zc1
;
float
yc1d
=
trns12
*
xc1
+
trns22
*
yc1
+
trns32
*
zc1
;
float
zc1d
=
trns13
*
xc1
+
trns23
*
yc1
+
trns33
*
zc1
;
// --- Step2 A2' ---
float
rc
=
0.5
*
params
.
y
;
float
rb
=
sqrt
(
params
.
x
*
params
.
x
-
rc
*
rc
);
float
ra
=
rb
*
(
m1
+
m2
)
/
totalMass
;
rb
-=
ra
;
float
sinphi
=
za1d
/
ra
;
float
cosphi
=
sqrt
(
1.0
-
sinphi
*
sinphi
);
float
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2
*
rc
*
cosphi
);
float
cospsi
=
sqrt
(
1.0
-
sinpsi
*
sinpsi
);
float
ya2d
=
ra
*
cosphi
;
float
xb2d
=
-
rc
*
cospsi
;
float
yb2d
=
-
rb
*
cosphi
-
rc
*
sinpsi
*
sinphi
;
float
yc2d
=
-
rb
*
cosphi
+
rc
*
sinpsi
*
sinphi
;
float
xb2d2
=
xb2d
*
xb2d
;
float
hh2
=
4.0
*
xb2d2
+
(
yb2d
-
yc2d
)
*
(
yb2d
-
yc2d
)
+
(
zb1d
-
zc1d
)
*
(
zb1d
-
zc1d
);
float
deltx
=
2.0
*
xb2d
+
sqrt
(
4.0
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
xb2d
-=
deltx
*
0.5
;
// --- Step3 al,be,ga ---
float
alpa
=
(
xb2d
*
(
xb0d
-
xc0d
)
+
yb0d
*
yb2d
+
yc0d
*
yc2d
);
float
beta
=
(
xb2d
*
(
yc0d
-
yb0d
)
+
xb0d
*
yb2d
+
xc0d
*
yc2d
);
float
gama
=
xb0d
*
yb1d
-
xb1d
*
yb0d
+
xc0d
*
yc1d
-
xc1d
*
yc0d
;
float
al2be2
=
alpa
*
alpa
+
beta
*
beta
;
float
sinthe
=
(
alpa
*
gama
-
beta
*
sqrt
(
al2be2
-
gama
*
gama
)
)
/
al2be2
;
// --- Step4 A3' ---
float
costhe
=
sqrt
(
1.0
-
sinthe
*
sinthe
);
float
xa3d
=
-
ya2d
*
sinthe
;
float
ya3d
=
ya2d
*
costhe
;
float
za3d
=
za1d
;
float
xb3d
=
xb2d
*
costhe
-
yb2d
*
sinthe
;
float
yb3d
=
xb2d
*
sinthe
+
yb2d
*
costhe
;
float
zb3d
=
zb1d
;
float
xc3d
=
-
xb2d
*
costhe
-
yc2d
*
sinthe
;
float
yc3d
=
-
xb2d
*
sinthe
+
yc2d
*
costhe
;
float
zc3d
=
zc1d
;
// --- Step5 A3 ---
float
xa3
=
trns11
*
xa3d
+
trns12
*
ya3d
+
trns13
*
za3d
;
float
ya3
=
trns21
*
xa3d
+
trns22
*
ya3d
+
trns23
*
za3d
;
float
za3
=
trns31
*
xa3d
+
trns32
*
ya3d
+
trns33
*
za3d
;
float
xb3
=
trns11
*
xb3d
+
trns12
*
yb3d
+
trns13
*
zb3d
;
float
yb3
=
trns21
*
xb3d
+
trns22
*
yb3d
+
trns23
*
zb3d
;
float
zb3
=
trns31
*
xb3d
+
trns32
*
yb3d
+
trns33
*
zb3d
;
float
xc3
=
trns11
*
xc3d
+
trns12
*
yc3d
+
trns13
*
zc3d
;
float
yc3
=
trns21
*
xc3d
+
trns22
*
yc3d
+
trns23
*
zc3d
;
float
zc3
=
trns31
*
xc3d
+
trns32
*
yc3d
+
trns33
*
zc3d
;
xp0
.
x
=
xcom
+
xa3
;
xp0
.
y
=
ycom
+
ya3
;
xp0
.
z
=
zcom
+
za3
;
xp1
.
x
=
xcom
+
xb3
;
xp1
.
y
=
ycom
+
yb3
;
xp1
.
z
=
zcom
+
zb3
;
xp2
.
x
=
xcom
+
xc3
;
xp2
.
y
=
ycom
+
yc3
;
xp2
.
z
=
zcom
+
zc3
;
cSim
.
pPosq
[
atomID
.
x
]
=
xp0
;
cSim
.
pPosq
[
atomID
.
y
]
=
xp1
;
cSim
.
pPosq
[
atomID
.
z
]
=
xp2
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kApplySecondSettle
(
gpuContext
gpu
)
{
// printf("kApplySecondSettle\n");
if
(
gpu
->
sim
.
settleConstraints
>
0
)
{
kApplySecondSettle_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
settle_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplySecondSettle"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment