Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
630bdd40
Commit
630bdd40
authored
Feb 20, 2009
by
Peter Eastman
Browse files
Added other files that somehow got ommitted from earlier checkin
parent
9b7a9f65
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1762 additions
and
0 deletions
+1762
-0
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+337
-0
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+407
-0
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+225
-0
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+290
-0
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+114
-0
platforms/cuda/src/kernels/kSettle.cu
platforms/cuda/src/kernels/kSettle.cu
+389
-0
No files found.
platforms/cuda/src/kernels/kCalculateCDLJForces.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float3
af
;
// Local atom fx, fy, fz
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
invR
;
float
sig
;
float
sig2
;
float
sig6
;
float
eps
;
float
dEdR
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pAttr
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
lasty
=
y
;
}
pos
++
;
}
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
0 → 100644
View file @
630bdd40
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for calculating Born sums. It is included
* several times in kCalculateObcGbsaBornSum.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
,
int
workUnits
)
{
extern
__shared__
Atom
sA
[];
int
end
=
workUnits
/
gridDim
.
x
;
int
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
#endif
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
+
(
blockIdx
.
x
*
workUnits
)
/
gridDim
.
x
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
r
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#if defined USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
{
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
if
((
j
!=
tgx
)
&&
(
ar
.
x
<
rScaledRadiusJ
))
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
apos
.
w
+=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
ar
.
x
<
(
psA
[
j
].
r
-
r
))
{
apos
.
w
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
}
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
#else
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
float4
apos
=
cSim
.
pPosq
[
i
];
// Local atom x, y, z, sum
float2
ar
=
cSim
.
pObcData
[
i
];
// Local atom vr, sr
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sum
=
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
{
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
ar
.
x
<
(
psA
[
tj
].
sr
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
term
;
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
tj
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
psA
[
tj
].
r
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
tj
].
r
)
-
l_ij
);
}
psA
[
tj
].
sum
+=
term
;
}
}
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
apos
.
w
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
+=
sA
[
threadIdx
.
x
].
sum
;
#else
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
apos
.
w
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pBornSum
[
offset
]
=
sA
[
threadIdx
.
x
].
sum
;
#endif
}
pos
-=
cSim
.
nonbond_workBlock
;
}
}
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateObcGbsaForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pObcData
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
a
.
x
;
sA
[
threadIdx
.
x
].
sr
=
a
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
a
.
y
*
a
.
y
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Atom I Born forces and sum
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
float
l_ij
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
r2Inverse
=
rInverse
*
rInverse
;
float
t1
=
log
(
u_ij
/
l_ij
);
float
t2
=
(
l_ij2
-
u_ij2
);
float
t3
=
t2
*
rInverse
;
t1
*=
rInverse
;
// Born Forces term
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
j
].
sr2
*
r2Inverse
)
*
t3
+
0
.
250
f
*
t1
*
r2Inverse
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
j
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
j
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
j
].
fz
+=
d
;
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#else
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
sA
[
threadIdx
.
x
].
fb
=
cSim
.
pBornForce
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
temp1
.
y
*
temp1
.
y
;
}
float
sr2
=
a
.
y
*
a
.
y
;
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
r2Inverse
=
1
.
0
f
/
r2
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ijJ
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
l_ijI
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
a
.
y
));
float
u_ijJ
=
1
.
0
f
/
rScaledRadiusJ
;
float
u_ijI
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t3J
=
t2J
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
t1J
*=
rInverse
;
t1I
*=
rInverse
;
// Born Forces term
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
tj
].
sr2
*
r2Inverse
)
*
t3J
+
0
.
250
f
*
t1J
*
r2Inverse
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
// Atom J Born sum term
term
=
0
.
125
f
*
(
1
.
000
f
+
sr2
*
r2Inverse
)
*
t3I
+
0
.
250
f
*
t1I
*
r2Inverse
;
dE
=
psA
[
tj
].
fb
*
term
;
#ifdef USE_PERIODIC
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
)
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4b
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4b
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#else
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
#endif
}
lasty
=
y
;
pos
++
;
}
}
platforms/cuda/src/kernels/kFindInteractingBlocks.h
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for identifying interacting blocks. It is included
* several times in kCalculateCDLJForces.cu with different #defines to generate
* different versions of the kernels.
*/
/**
* Find a bounding box for the atoms in each block.
*/
__global__
void
METHOD_NAME
(
kFindBlockBounds
,
_kernel
)()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
base
=
pos
<<
GRIDBITS
;
if
(
base
<
cSim
.
atoms
)
{
float4
apos
=
cSim
.
pPosq
[
base
];
#ifdef USE_PERIODIC
apos
.
x
-=
floor
(
apos
.
x
/
cSim
.
periodicBoxSizeX
)
*
cSim
.
periodicBoxSizeX
;
apos
.
y
-=
floor
(
apos
.
y
/
cSim
.
periodicBoxSizeY
)
*
cSim
.
periodicBoxSizeY
;
apos
.
z
-=
floor
(
apos
.
z
/
cSim
.
periodicBoxSizeZ
)
*
cSim
.
periodicBoxSizeZ
;
float4
firstPoint
=
apos
;
#endif
float
minx
=
apos
.
x
;
float
maxx
=
apos
.
x
;
float
miny
=
apos
.
y
;
float
maxy
=
apos
.
y
;
float
minz
=
apos
.
z
;
float
maxz
=
apos
.
z
;
for
(
unsigned
int
i
=
1
;
i
<
GRID
;
i
++
)
{
apos
=
cSim
.
pPosq
[
base
+
i
];
#ifdef USE_PERIODIC
apos
.
x
-=
floor
((
apos
.
x
-
firstPoint
.
x
)
/
cSim
.
periodicBoxSizeX
+
0
.
5
)
*
cSim
.
periodicBoxSizeX
;
apos
.
y
-=
floor
((
apos
.
y
-
firstPoint
.
y
)
/
cSim
.
periodicBoxSizeY
+
0
.
5
)
*
cSim
.
periodicBoxSizeY
;
apos
.
z
-=
floor
((
apos
.
z
-
firstPoint
.
z
)
/
cSim
.
periodicBoxSizeZ
+
0
.
5
)
*
cSim
.
periodicBoxSizeZ
;
#endif
minx
=
min
(
minx
,
apos
.
x
);
maxx
=
max
(
maxx
,
apos
.
x
);
miny
=
min
(
miny
,
apos
.
y
);
maxy
=
max
(
maxy
,
apos
.
y
);
minz
=
min
(
minz
,
apos
.
z
);
maxz
=
max
(
maxz
,
apos
.
z
);
}
cSim
.
pGridBoundingBox
[
pos
]
=
make_float4
(
0
.
5
f
*
(
maxx
-
minx
),
0
.
5
f
*
(
maxy
-
miny
),
0
.
5
f
*
(
maxz
-
minz
),
0
);
cSim
.
pGridCenter
[
pos
]
=
make_float4
(
0
.
5
f
*
(
maxx
+
minx
),
0
.
5
f
*
(
maxy
+
miny
),
0
.
5
f
*
(
maxz
+
minz
),
0
);
}
}
/**
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
*/
__global__
void
METHOD_NAME
(
kFindBlocksWithInteractions
,
_kernel
)()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
pos
<
cSim
.
workUnits
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
cSim
.
pWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
);
x
=
(
x
>>
17
);
// Find the distance between the bounding boxes of the two cells.
float4
centera
=
cSim
.
pGridCenter
[
x
];
float4
centerb
=
cSim
.
pGridCenter
[
y
];
float
dx
=
centera
.
x
-
centerb
.
x
;
float
dy
=
centera
.
y
-
centerb
.
y
;
float
dz
=
centera
.
z
-
centerb
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float4
boxSizea
=
cSim
.
pGridBoundingBox
[
x
];
float4
boxSizeb
=
cSim
.
pGridBoundingBox
[
y
];
dx
=
max
(
0
.
0
f
,
abs
(
dx
)
-
boxSizea
.
x
-
boxSizeb
.
x
);
dy
=
max
(
0
.
0
f
,
abs
(
dy
)
-
boxSizea
.
y
-
boxSizeb
.
y
);
dz
=
max
(
0
.
0
f
,
abs
(
dz
)
-
boxSizea
.
z
-
boxSizeb
.
z
);
cSim
.
pInteractionFlag
[
pos
]
=
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cSim
.
nonbondedCutoffSqr
?
0
:
1
);
}
}
platforms/cuda/src/kernels/kSettle.cu
0 → 100644
View file @
630bdd40
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using
namespace
std
;
#define DeltaShake
#include "gputypes.h"
static
__constant__
cudaGmxSimulation
cSim
;
void
SetSettleSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetSettleSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
/**
* This is based on the setlep FORTRAN routine by Shuichi Miyamoto. See
* S. Miyamoto and P. Kollman, J. Comp. Chem., vol 13, no. 8, pp. 952-962 (1992).
*/
__global__
void
kApplyFirstSettle_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
{
int4
atomID
=
cSim
.
pSettleID
[
pos
];
float2
params
=
cSim
.
pSettleParameter
[
pos
];
float4
apos0
=
cSim
.
pOldPosq
[
atomID
.
x
];
float4
xp0
=
cSim
.
pPosqP
[
atomID
.
x
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xp1
=
cSim
.
pPosqP
[
atomID
.
y
];
float4
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
float4
xp2
=
cSim
.
pPosqP
[
atomID
.
z
];
float
m0
=
1.0
/
cSim
.
pVelm4
[
atomID
.
x
].
w
;
float
m1
=
1.0
/
cSim
.
pVelm4
[
atomID
.
y
].
w
;
float
m2
=
1.0
/
cSim
.
pVelm4
[
atomID
.
z
].
w
;
float
xb0
=
apos1
.
x
-
apos0
.
x
;
float
yb0
=
apos1
.
y
-
apos0
.
y
;
float
zb0
=
apos1
.
z
-
apos0
.
z
;
float
xc0
=
apos2
.
x
-
apos0
.
x
;
float
yc0
=
apos2
.
y
-
apos0
.
y
;
float
zc0
=
apos2
.
z
-
apos0
.
z
;
float
dist1
=
sqrt
(
xb0
*
xb0
+
yb0
*
yb0
+
zb0
*
zb0
);
float
dist2
=
sqrt
(
xc0
*
xc0
+
yc0
*
yc0
+
zc0
*
zc0
);
float
totalMass
=
m0
+
m1
+
m2
;
float
xcom
=
((
apos0
.
x
+
xp0
.
x
)
*
m0
+
(
apos1
.
x
+
xp1
.
x
)
*
m1
+
(
apos2
.
x
+
xp2
.
x
)
*
m2
)
/
totalMass
;
float
ycom
=
((
apos0
.
y
+
xp0
.
y
)
*
m0
+
(
apos1
.
y
+
xp1
.
y
)
*
m1
+
(
apos2
.
y
+
xp2
.
y
)
*
m2
)
/
totalMass
;
float
zcom
=
((
apos0
.
z
+
xp0
.
z
)
*
m0
+
(
apos1
.
z
+
xp1
.
z
)
*
m1
+
(
apos2
.
z
+
xp2
.
z
)
*
m2
)
/
totalMass
;
float
xa1
=
apos0
.
x
+
xp0
.
x
-
xcom
;
float
ya1
=
apos0
.
y
+
xp0
.
y
-
ycom
;
float
za1
=
apos0
.
z
+
xp0
.
z
-
zcom
;
float
xb1
=
apos1
.
x
+
xp1
.
x
-
xcom
;
float
yb1
=
apos1
.
y
+
xp1
.
y
-
ycom
;
float
zb1
=
apos1
.
z
+
xp1
.
z
-
zcom
;
float
xc1
=
apos2
.
x
+
xp2
.
x
-
xcom
;
float
yc1
=
apos2
.
y
+
xp2
.
y
-
ycom
;
float
zc1
=
apos2
.
z
+
xp2
.
z
-
zcom
;
float
xaksZd
=
yb0
*
zc0
-
zb0
*
yc0
;
float
yaksZd
=
zb0
*
xc0
-
xb0
*
zc0
;
float
zaksZd
=
xb0
*
yc0
-
yb0
*
xc0
;
float
xaksXd
=
ya1
*
zaksZd
-
za1
*
yaksZd
;
float
yaksXd
=
za1
*
xaksZd
-
xa1
*
zaksZd
;
float
zaksXd
=
xa1
*
yaksZd
-
ya1
*
xaksZd
;
float
xaksYd
=
yaksZd
*
zaksXd
-
zaksZd
*
yaksXd
;
float
yaksYd
=
zaksZd
*
xaksXd
-
xaksZd
*
zaksXd
;
float
zaksYd
=
xaksZd
*
yaksXd
-
yaksZd
*
xaksXd
;
float
axlng
=
sqrt
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
float
aylng
=
sqrt
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
float
azlng
=
sqrt
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
float
trns11
=
xaksXd
/
axlng
;
float
trns21
=
yaksXd
/
axlng
;
float
trns31
=
zaksXd
/
axlng
;
float
trns12
=
xaksYd
/
aylng
;
float
trns22
=
yaksYd
/
aylng
;
float
trns32
=
zaksYd
/
aylng
;
float
trns13
=
xaksZd
/
azlng
;
float
trns23
=
yaksZd
/
azlng
;
float
trns33
=
zaksZd
/
azlng
;
float
xb0d
=
trns11
*
xb0
+
trns21
*
yb0
+
trns31
*
zb0
;
float
yb0d
=
trns12
*
xb0
+
trns22
*
yb0
+
trns32
*
zb0
;
float
xc0d
=
trns11
*
xc0
+
trns21
*
yc0
+
trns31
*
zc0
;
float
yc0d
=
trns12
*
xc0
+
trns22
*
yc0
+
trns32
*
zc0
;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float
za1d
=
trns13
*
xa1
+
trns23
*
ya1
+
trns33
*
za1
;
float
xb1d
=
trns11
*
xb1
+
trns21
*
yb1
+
trns31
*
zb1
;
float
yb1d
=
trns12
*
xb1
+
trns22
*
yb1
+
trns32
*
zb1
;
float
zb1d
=
trns13
*
xb1
+
trns23
*
yb1
+
trns33
*
zb1
;
float
xc1d
=
trns11
*
xc1
+
trns21
*
yc1
+
trns31
*
zc1
;
float
yc1d
=
trns12
*
xc1
+
trns22
*
yc1
+
trns32
*
zc1
;
float
zc1d
=
trns13
*
xc1
+
trns23
*
yc1
+
trns33
*
zc1
;
// --- Step2 A2' ---
float
rc
=
0.5
*
params
.
y
;
float
rb
=
sqrt
(
params
.
x
*
params
.
x
-
rc
*
rc
);
float
ra
=
rb
*
(
m1
+
m2
)
/
totalMass
;
rb
-=
ra
;
float
sinphi
=
za1d
/
ra
;
float
cosphi
=
sqrt
(
1.0
-
sinphi
*
sinphi
);
float
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2
*
rc
*
cosphi
);
float
cospsi
=
sqrt
(
1.0
-
sinpsi
*
sinpsi
);
float
ya2d
=
ra
*
cosphi
;
float
xb2d
=
-
rc
*
cospsi
;
float
yb2d
=
-
rb
*
cosphi
-
rc
*
sinpsi
*
sinphi
;
float
yc2d
=
-
rb
*
cosphi
+
rc
*
sinpsi
*
sinphi
;
float
xb2d2
=
xb2d
*
xb2d
;
float
hh2
=
4.0
*
xb2d2
+
(
yb2d
-
yc2d
)
*
(
yb2d
-
yc2d
)
+
(
zb1d
-
zc1d
)
*
(
zb1d
-
zc1d
);
float
deltx
=
2.0
*
xb2d
+
sqrt
(
4.0
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
xb2d
-=
deltx
*
0.5
;
// --- Step3 al,be,ga ---
float
alpa
=
(
xb2d
*
(
xb0d
-
xc0d
)
+
yb0d
*
yb2d
+
yc0d
*
yc2d
);
float
beta
=
(
xb2d
*
(
yc0d
-
yb0d
)
+
xb0d
*
yb2d
+
xc0d
*
yc2d
);
float
gama
=
xb0d
*
yb1d
-
xb1d
*
yb0d
+
xc0d
*
yc1d
-
xc1d
*
yc0d
;
float
al2be2
=
alpa
*
alpa
+
beta
*
beta
;
float
sinthe
=
(
alpa
*
gama
-
beta
*
sqrt
(
al2be2
-
gama
*
gama
)
)
/
al2be2
;
// --- Step4 A3' ---
float
costhe
=
sqrt
(
1.0
-
sinthe
*
sinthe
);
float
xa3d
=
-
ya2d
*
sinthe
;
float
ya3d
=
ya2d
*
costhe
;
float
za3d
=
za1d
;
float
xb3d
=
xb2d
*
costhe
-
yb2d
*
sinthe
;
float
yb3d
=
xb2d
*
sinthe
+
yb2d
*
costhe
;
float
zb3d
=
zb1d
;
float
xc3d
=
-
xb2d
*
costhe
-
yc2d
*
sinthe
;
float
yc3d
=
-
xb2d
*
sinthe
+
yc2d
*
costhe
;
float
zc3d
=
zc1d
;
// --- Step5 A3 ---
float
xa3
=
trns11
*
xa3d
+
trns12
*
ya3d
+
trns13
*
za3d
;
float
ya3
=
trns21
*
xa3d
+
trns22
*
ya3d
+
trns23
*
za3d
;
float
za3
=
trns31
*
xa3d
+
trns32
*
ya3d
+
trns33
*
za3d
;
float
xb3
=
trns11
*
xb3d
+
trns12
*
yb3d
+
trns13
*
zb3d
;
float
yb3
=
trns21
*
xb3d
+
trns22
*
yb3d
+
trns23
*
zb3d
;
float
zb3
=
trns31
*
xb3d
+
trns32
*
yb3d
+
trns33
*
zb3d
;
float
xc3
=
trns11
*
xc3d
+
trns12
*
yc3d
+
trns13
*
zc3d
;
float
yc3
=
trns21
*
xc3d
+
trns22
*
yc3d
+
trns23
*
zc3d
;
float
zc3
=
trns31
*
xc3d
+
trns32
*
yc3d
+
trns33
*
zc3d
;
xp0
.
x
=
xcom
+
xa3
-
apos0
.
x
;
xp0
.
y
=
ycom
+
ya3
-
apos0
.
y
;
xp0
.
z
=
zcom
+
za3
-
apos0
.
z
;
xp1
.
x
=
xcom
+
xb3
-
apos1
.
x
;
xp1
.
y
=
ycom
+
yb3
-
apos1
.
y
;
xp1
.
z
=
zcom
+
zb3
-
apos1
.
z
;
xp2
.
x
=
xcom
+
xc3
-
apos2
.
x
;
xp2
.
y
=
ycom
+
yc3
-
apos2
.
y
;
xp2
.
z
=
zcom
+
zc3
-
apos2
.
z
;
cSim
.
pPosqP
[
atomID
.
x
]
=
xp0
;
cSim
.
pPosqP
[
atomID
.
y
]
=
xp1
;
cSim
.
pPosqP
[
atomID
.
z
]
=
xp2
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kApplyFirstSettle
(
gpuContext
gpu
)
{
// printf("kApplyFirstSettle\n");
if
(
gpu
->
sim
.
settleConstraints
>
0
)
{
kApplyFirstSettle_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
settle_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplyFirstSettle"
);
}
}
__global__
void
kApplySecondSettle_kernel
()
{
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
settleConstraints
)
{
int4
atomID
=
cSim
.
pSettleID
[
pos
];
float2
params
=
cSim
.
pSettleParameter
[
pos
];
float4
apos0
=
cSim
.
pOldPosq
[
atomID
.
x
];
float4
xp0
=
cSim
.
pPosq
[
atomID
.
x
];
float4
apos1
=
cSim
.
pOldPosq
[
atomID
.
y
];
float4
xp1
=
cSim
.
pPosq
[
atomID
.
y
];
float4
apos2
=
cSim
.
pOldPosq
[
atomID
.
z
];
float4
xp2
=
cSim
.
pPosq
[
atomID
.
z
];
float
m0
=
1.0
/
cSim
.
pVelm4
[
atomID
.
x
].
w
;
float
m1
=
1.0
/
cSim
.
pVelm4
[
atomID
.
y
].
w
;
float
m2
=
1.0
/
cSim
.
pVelm4
[
atomID
.
z
].
w
;
float
xb0
=
apos1
.
x
-
apos0
.
x
;
float
yb0
=
apos1
.
y
-
apos0
.
y
;
float
zb0
=
apos1
.
z
-
apos0
.
z
;
float
xc0
=
apos2
.
x
-
apos0
.
x
;
float
yc0
=
apos2
.
y
-
apos0
.
y
;
float
zc0
=
apos2
.
z
-
apos0
.
z
;
float
dist1
=
sqrt
(
xb0
*
xb0
+
yb0
*
yb0
+
zb0
*
zb0
);
float
dist2
=
sqrt
(
xc0
*
xc0
+
yc0
*
yc0
+
zc0
*
zc0
);
float
totalMass
=
m0
+
m1
+
m2
;
float
xcom
=
((
apos0
.
x
+
xp0
.
x
)
*
m0
+
(
apos1
.
x
+
xp1
.
x
)
*
m1
+
(
apos2
.
x
+
xp2
.
x
)
*
m2
)
/
totalMass
;
float
ycom
=
((
apos0
.
y
+
xp0
.
y
)
*
m0
+
(
apos1
.
y
+
xp1
.
y
)
*
m1
+
(
apos2
.
y
+
xp2
.
y
)
*
m2
)
/
totalMass
;
float
zcom
=
((
apos0
.
z
+
xp0
.
z
)
*
m0
+
(
apos1
.
z
+
xp1
.
z
)
*
m1
+
(
apos2
.
z
+
xp2
.
z
)
*
m2
)
/
totalMass
;
float
xa1
=
apos0
.
x
+
xp0
.
x
-
xcom
;
float
ya1
=
apos0
.
y
+
xp0
.
y
-
ycom
;
float
za1
=
apos0
.
z
+
xp0
.
z
-
zcom
;
float
xb1
=
apos1
.
x
+
xp1
.
x
-
xcom
;
float
yb1
=
apos1
.
y
+
xp1
.
y
-
ycom
;
float
zb1
=
apos1
.
z
+
xp1
.
z
-
zcom
;
float
xc1
=
apos2
.
x
+
xp2
.
x
-
xcom
;
float
yc1
=
apos2
.
y
+
xp2
.
y
-
ycom
;
float
zc1
=
apos2
.
z
+
xp2
.
z
-
zcom
;
float
xaksZd
=
yb0
*
zc0
-
zb0
*
yc0
;
float
yaksZd
=
zb0
*
xc0
-
xb0
*
zc0
;
float
zaksZd
=
xb0
*
yc0
-
yb0
*
xc0
;
float
xaksXd
=
ya1
*
zaksZd
-
za1
*
yaksZd
;
float
yaksXd
=
za1
*
xaksZd
-
xa1
*
zaksZd
;
float
zaksXd
=
xa1
*
yaksZd
-
ya1
*
xaksZd
;
float
xaksYd
=
yaksZd
*
zaksXd
-
zaksZd
*
yaksXd
;
float
yaksYd
=
zaksZd
*
xaksXd
-
xaksZd
*
zaksXd
;
float
zaksYd
=
xaksZd
*
yaksXd
-
yaksZd
*
xaksXd
;
float
axlng
=
sqrt
(
xaksXd
*
xaksXd
+
yaksXd
*
yaksXd
+
zaksXd
*
zaksXd
);
float
aylng
=
sqrt
(
xaksYd
*
xaksYd
+
yaksYd
*
yaksYd
+
zaksYd
*
zaksYd
);
float
azlng
=
sqrt
(
xaksZd
*
xaksZd
+
yaksZd
*
yaksZd
+
zaksZd
*
zaksZd
);
float
trns11
=
xaksXd
/
axlng
;
float
trns21
=
yaksXd
/
axlng
;
float
trns31
=
zaksXd
/
axlng
;
float
trns12
=
xaksYd
/
aylng
;
float
trns22
=
yaksYd
/
aylng
;
float
trns32
=
zaksYd
/
aylng
;
float
trns13
=
xaksZd
/
azlng
;
float
trns23
=
yaksZd
/
azlng
;
float
trns33
=
zaksZd
/
azlng
;
float
xb0d
=
trns11
*
xb0
+
trns21
*
yb0
+
trns31
*
zb0
;
float
yb0d
=
trns12
*
xb0
+
trns22
*
yb0
+
trns32
*
zb0
;
float
xc0d
=
trns11
*
xc0
+
trns21
*
yc0
+
trns31
*
zc0
;
float
yc0d
=
trns12
*
xc0
+
trns22
*
yc0
+
trns32
*
zc0
;
// float xa1d = trns11*xa1 + trns21*ya1 + trns31*za1;
// float ya1d = trns12*xa1 + trns22*ya1 + trns32*za1;
float
za1d
=
trns13
*
xa1
+
trns23
*
ya1
+
trns33
*
za1
;
float
xb1d
=
trns11
*
xb1
+
trns21
*
yb1
+
trns31
*
zb1
;
float
yb1d
=
trns12
*
xb1
+
trns22
*
yb1
+
trns32
*
zb1
;
float
zb1d
=
trns13
*
xb1
+
trns23
*
yb1
+
trns33
*
zb1
;
float
xc1d
=
trns11
*
xc1
+
trns21
*
yc1
+
trns31
*
zc1
;
float
yc1d
=
trns12
*
xc1
+
trns22
*
yc1
+
trns32
*
zc1
;
float
zc1d
=
trns13
*
xc1
+
trns23
*
yc1
+
trns33
*
zc1
;
// --- Step2 A2' ---
float
rc
=
0.5
*
params
.
y
;
float
rb
=
sqrt
(
params
.
x
*
params
.
x
-
rc
*
rc
);
float
ra
=
rb
*
(
m1
+
m2
)
/
totalMass
;
rb
-=
ra
;
float
sinphi
=
za1d
/
ra
;
float
cosphi
=
sqrt
(
1.0
-
sinphi
*
sinphi
);
float
sinpsi
=
(
zb1d
-
zc1d
)
/
(
2
*
rc
*
cosphi
);
float
cospsi
=
sqrt
(
1.0
-
sinpsi
*
sinpsi
);
float
ya2d
=
ra
*
cosphi
;
float
xb2d
=
-
rc
*
cospsi
;
float
yb2d
=
-
rb
*
cosphi
-
rc
*
sinpsi
*
sinphi
;
float
yc2d
=
-
rb
*
cosphi
+
rc
*
sinpsi
*
sinphi
;
float
xb2d2
=
xb2d
*
xb2d
;
float
hh2
=
4.0
*
xb2d2
+
(
yb2d
-
yc2d
)
*
(
yb2d
-
yc2d
)
+
(
zb1d
-
zc1d
)
*
(
zb1d
-
zc1d
);
float
deltx
=
2.0
*
xb2d
+
sqrt
(
4.0
*
xb2d2
-
hh2
+
params
.
y
*
params
.
y
);
xb2d
-=
deltx
*
0.5
;
// --- Step3 al,be,ga ---
float
alpa
=
(
xb2d
*
(
xb0d
-
xc0d
)
+
yb0d
*
yb2d
+
yc0d
*
yc2d
);
float
beta
=
(
xb2d
*
(
yc0d
-
yb0d
)
+
xb0d
*
yb2d
+
xc0d
*
yc2d
);
float
gama
=
xb0d
*
yb1d
-
xb1d
*
yb0d
+
xc0d
*
yc1d
-
xc1d
*
yc0d
;
float
al2be2
=
alpa
*
alpa
+
beta
*
beta
;
float
sinthe
=
(
alpa
*
gama
-
beta
*
sqrt
(
al2be2
-
gama
*
gama
)
)
/
al2be2
;
// --- Step4 A3' ---
float
costhe
=
sqrt
(
1.0
-
sinthe
*
sinthe
);
float
xa3d
=
-
ya2d
*
sinthe
;
float
ya3d
=
ya2d
*
costhe
;
float
za3d
=
za1d
;
float
xb3d
=
xb2d
*
costhe
-
yb2d
*
sinthe
;
float
yb3d
=
xb2d
*
sinthe
+
yb2d
*
costhe
;
float
zb3d
=
zb1d
;
float
xc3d
=
-
xb2d
*
costhe
-
yc2d
*
sinthe
;
float
yc3d
=
-
xb2d
*
sinthe
+
yc2d
*
costhe
;
float
zc3d
=
zc1d
;
// --- Step5 A3 ---
float
xa3
=
trns11
*
xa3d
+
trns12
*
ya3d
+
trns13
*
za3d
;
float
ya3
=
trns21
*
xa3d
+
trns22
*
ya3d
+
trns23
*
za3d
;
float
za3
=
trns31
*
xa3d
+
trns32
*
ya3d
+
trns33
*
za3d
;
float
xb3
=
trns11
*
xb3d
+
trns12
*
yb3d
+
trns13
*
zb3d
;
float
yb3
=
trns21
*
xb3d
+
trns22
*
yb3d
+
trns23
*
zb3d
;
float
zb3
=
trns31
*
xb3d
+
trns32
*
yb3d
+
trns33
*
zb3d
;
float
xc3
=
trns11
*
xc3d
+
trns12
*
yc3d
+
trns13
*
zc3d
;
float
yc3
=
trns21
*
xc3d
+
trns22
*
yc3d
+
trns23
*
zc3d
;
float
zc3
=
trns31
*
xc3d
+
trns32
*
yc3d
+
trns33
*
zc3d
;
xp0
.
x
=
xcom
+
xa3
;
xp0
.
y
=
ycom
+
ya3
;
xp0
.
z
=
zcom
+
za3
;
xp1
.
x
=
xcom
+
xb3
;
xp1
.
y
=
ycom
+
yb3
;
xp1
.
z
=
zcom
+
zb3
;
xp2
.
x
=
xcom
+
xc3
;
xp2
.
y
=
ycom
+
yc3
;
xp2
.
z
=
zcom
+
zc3
;
cSim
.
pPosq
[
atomID
.
x
]
=
xp0
;
cSim
.
pPosq
[
atomID
.
y
]
=
xp1
;
cSim
.
pPosq
[
atomID
.
z
]
=
xp2
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
void
kApplySecondSettle
(
gpuContext
gpu
)
{
// printf("kApplySecondSettle\n");
if
(
gpu
->
sim
.
settleConstraints
>
0
)
{
kApplySecondSettle_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
settle_threads_per_block
>>>
();
LAUNCHERROR
(
"kApplySecondSettle"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment