Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8055a541
Commit
8055a541
authored
Feb 27, 2009
by
Peter Eastman
Browse files
Reduced memory use for exclusions. Also deleted some obsolete files.
parent
b98859ec
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
85 additions
and
1157 deletions
+85
-1157
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+1
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+67
-17
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+1
-0
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+8
-3
platforms/cuda/src/kernels/kCalculateCDLJForces_12.cu
platforms/cuda/src/kernels/kCalculateCDLJForces_12.cu
+0
-375
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+8
-3
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1_12.cu
...forms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1_12.cu
+0
-422
platforms/cuda/src/kernels/kCalculateObcGbsaForces2_12.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces2_12.cu
+0
-336
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
8055a541
...
...
@@ -277,7 +277,6 @@ struct cudaGmxSimulation {
unsigned
int
stride2
;
// Atomic attributes stride x 2
unsigned
int
stride3
;
// Atomic attributes stride x 3
unsigned
int
stride4
;
// Atomic attributes stride x 4
unsigned
int
exclusionStride
;
// Exclusion list stride = stride / GRID
unsigned
int
nonbondOutputBuffers
;
// Nonbond output buffers per nonbond call
unsigned
int
totalNonbondOutputBuffers
;
// Total nonbond output buffers
unsigned
int
outputBuffers
;
// Number of output buffers
...
...
@@ -357,6 +356,7 @@ struct cudaGmxSimulation {
int4
*
pSettleID
;
// Settle atoms
float2
*
pSettleParameter
;
// Settle parameters
unsigned
int
*
pExclusion
;
// Nonbond exclusion data
unsigned
int
*
pExclusionIndex
;
// Index of exclusion data for each work unit
unsigned
int
bond_offset
;
// Offset to end of bonds
unsigned
int
bond_angle_offset
;
// Offset to end of bond angles
unsigned
int
dihedral_offset
;
// Offset to end of dihedrals
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
8055a541
...
...
@@ -1172,7 +1172,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
stride2
=
2
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
stride3
=
3
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
stride4
=
4
*
gpu
->
sim
.
stride
;
gpu
->
sim
.
exclusionStride
=
gpu
->
sim
.
stride
/
GRID
;
gpu
->
psPosqP4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
);
gpu
->
sim
.
pPosqP
=
gpu
->
psPosqP4
->
_pDevStream
[
0
];
gpu
->
psOldPosq4
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
);
...
...
@@ -1533,6 +1532,7 @@ void* gpuInit(int numAtoms)
gpu
->
psSettleID
=
NULL
;
gpu
->
psSettleParameter
=
NULL
;
gpu
->
psExclusion
=
NULL
;
gpu
->
psExclusionIndex
=
NULL
;
gpu
->
psWorkUnit
=
NULL
;
gpu
->
psInteractingWorkUnit
=
NULL
;
gpu
->
psInteractionFlag
=
NULL
;
...
...
@@ -1665,6 +1665,7 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psSettleID
;
delete
gpu
->
psSettleParameter
;
delete
gpu
->
psExclusion
;
delete
gpu
->
psExclusionIndex
;
delete
gpu
->
psWorkUnit
;
delete
gpu
->
psInteractingWorkUnit
;
delete
gpu
->
psInteractionFlag
;
...
...
@@ -1871,31 +1872,74 @@ void gpuBuildExclusionList(gpuContext gpu)
{
const
unsigned
int
atoms
=
gpu
->
sim
.
paddedNumberOfAtoms
;
const
unsigned
int
grid
=
gpu
->
grid
;
const
unsigned
int
dim
=
(
atoms
+
(
grid
-
1
))
/
grid
;
CUDAStream
<
unsigned
int
>*
psExclusion
=
new
CUDAStream
<
unsigned
int
>
((
atoms
*
atoms
+
grid
-
1
)
/
grid
,
1u
);
gpu
->
psExclusion
=
psExclusion
;
gpu
->
sim
.
pExclusion
=
psExclusion
->
_pDevStream
[
0
];
unsigned
int
*
pExList
=
psExclusion
->
_pSysStream
[
0
];
const
unsigned
int
dim
=
atoms
/
grid
;
unsigned
int
*
pWorkList
=
gpu
->
psWorkUnit
->
_pSysStream
[
0
];
for
(
int
i
=
0
;
i
<
psExclusion
->
_length
;
++
i
)
pExList
[
i
]
=
0xFFFFFFFF
;
//
Fill in th
e exclusions.
//
Mark which work units hav
e exclusions.
for
(
int
atom1
=
0
;
atom1
<
gpu
->
exclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
grid
;
int
offset
=
atom1
-
x
*
grid
;
for
(
int
j
=
0
;
j
<
gpu
->
exclusions
[
atom1
].
size
();
++
j
)
{
int
atom2
=
gpu
->
exclusions
[
atom1
][
j
];
int
y
=
atom2
/
grid
;
int
index
=
x
*
atoms
+
y
*
grid
+
offset
;
pExList
[
index
]
&=
0xFFFFFFFF
-
(
1
<<
(
atom2
-
y
*
grid
));
int
cell
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
pWorkList
[
cell
]
|=
1
;
}
}
if
(
gpu
->
sim
.
paddedNumberOfAtoms
>
gpu
->
natoms
)
{
int
lastBlock
=
gpu
->
natoms
/
grid
;
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
workUnits
;
++
i
)
{
int
x
=
pWorkList
[
i
]
>>
17
;
int
y
=
(
pWorkList
[
i
]
>>
2
)
&
0x7FFF
;
if
(
x
==
lastBlock
||
y
==
lastBlock
)
pWorkList
[
i
]
|=
1
;
}
}
// Build a list of indexes for the work units with exclusions.
CUDAStream
<
unsigned
int
>*
psExclusionIndex
=
new
CUDAStream
<
unsigned
int
>
(
gpu
->
sim
.
workUnits
,
1u
);
gpu
->
psExclusionIndex
=
psExclusionIndex
;
unsigned
int
*
pExclusionIndex
=
psExclusionIndex
->
_pSysData
;
gpu
->
sim
.
pExclusionIndex
=
psExclusionIndex
->
_pDevData
;
int
numWithExclusions
=
0
;
for
(
int
i
=
0
;
i
<
psExclusionIndex
->
_length
;
++
i
)
if
((
pWorkList
[
i
]
&
1
)
==
1
)
pExclusionIndex
[
i
]
=
(
numWithExclusions
++
)
*
grid
;
// Record the exclusion data.
CUDAStream
<
unsigned
int
>*
psExclusion
=
new
CUDAStream
<
unsigned
int
>
(
numWithExclusions
*
grid
,
1u
);
gpu
->
psExclusion
=
psExclusion
;
unsigned
int
*
pExclusion
=
psExclusion
->
_pSysData
;
gpu
->
sim
.
pExclusion
=
psExclusion
->
_pDevData
;
for
(
int
i
=
0
;
i
<
psExclusion
->
_length
;
++
i
)
pExclusion
[
i
]
=
0xFFFFFFFF
;
for
(
int
atom1
=
0
;
atom1
<
gpu
->
exclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
grid
;
int
offset1
=
atom1
-
x
*
grid
;
for
(
int
j
=
0
;
j
<
gpu
->
exclusions
[
atom1
].
size
();
++
j
)
{
int
atom2
=
gpu
->
exclusions
[
atom1
][
j
];
int
y
=
atom2
/
grid
;
int
offset2
=
atom2
-
y
*
grid
;
if
(
x
>
y
)
{
int
cell
=
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset1
]
&=
0xFFFFFFFF
-
(
1
<<
offset2
);
}
else
{
int
cell
=
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset2
]
&=
0xFFFFFFFF
-
(
1
<<
offset1
);
}
}
}
// Mark all interactions that involve a padding atom as being excluded.
...
...
@@ -1907,16 +1951,22 @@ void gpuBuildExclusionList(gpuContext gpu)
{
int
y
=
atom2
/
grid
;
int
index
=
x
*
atoms
+
y
*
grid
+
offset1
;
pExList
[
index
]
&=
0xFFFFFFFF
-
(
1
<<
(
atom2
-
y
*
grid
));
int
offset2
=
atom2
-
y
*
grid
;
index
=
y
*
atoms
+
x
*
grid
+
offset2
;
pExList
[
index
]
&=
0xFFFFFFFF
-
(
1
<<
(
atom1
-
x
*
grid
));
int
cell
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
pWorkList
[
cell
]
|=
1
;
if
(
x
>=
y
)
{
int
cell
=
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset1
]
&=
0xFFFFFFFF
-
(
1
<<
offset2
);
}
if
(
y
>=
x
)
{
int
cell
=
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
;
pExclusion
[
pExclusionIndex
[
cell
]
+
offset2
]
&=
0xFFFFFFFF
-
(
1
<<
offset1
);
}
}
}
psExclusion
->
Upload
();
psExclusionIndex
->
Upload
();
gpu
->
psWorkUnit
->
Upload
();
gpuSetConstants
(
gpu
);
}
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
8055a541
...
...
@@ -116,6 +116,7 @@ struct _gpuContext {
CUDAStream
<
int4
>*
psSettleID
;
CUDAStream
<
float2
>*
psSettleParameter
;
CUDAStream
<
unsigned
int
>*
psExclusion
;
CUDAStream
<
unsigned
int
>*
psExclusionIndex
;
CUDAStream
<
unsigned
int
>*
psWorkUnit
;
CUDAStream
<
unsigned
int
>*
psInteractingWorkUnit
;
CUDAStream
<
unsigned
int
>*
psInteractionFlag
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
8055a541
...
...
@@ -126,7 +126,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
unsigned
int
xi
=
x
>>
GRIDBITS
;
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
...
...
@@ -253,8 +255,11 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces_12.cu
deleted
100755 → 0
View file @
b98859ec
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#include "cudatypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
q
;
float
sig
;
float
eps
;
float
fx
;
float
fy
;
float
fz
;
};
__shared__
Atom
sA
[
GT2XX_NONBOND_THREADS_PER_BLOCK
];
__shared__
unsigned
int
sWorkUnit
[
GT2XX_NONBOND_WORKUNITS_PER_SM
];
__shared__
unsigned
int
sNext
[
GRID
];
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateCDLJForces_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateCDLJForces_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kCalculateCDLJForces_12_kernel
()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int
pos
=
cSim
.
nbWorkUnitsPerBlock
*
blockIdx
.
x
+
min
(
blockIdx
.
x
,
cSim
.
nbWorkUnitsPerBlockRemainder
);
int
end
=
cSim
.
nbWorkUnitsPerBlock
*
(
blockIdx
.
x
+
1
)
+
min
((
blockIdx
.
x
+
1
),
cSim
.
nbWorkUnitsPerBlockRemainder
);
if
(
threadIdx
.
x
<
end
-
pos
)
{
sWorkUnit
[
threadIdx
.
x
]
=
cSim
.
pWorkUnit
[
pos
+
threadIdx
.
x
];
}
if
(
threadIdx
.
x
<
GRID
)
{
sNext
[
threadIdx
.
x
]
=
(
threadIdx
.
x
+
1
)
&
(
GRID
-
1
);
}
__syncthreads
();
// Now change pos and end to reflect work queue just read
// into shared memory
end
=
end
-
pos
;
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
sWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float3
af
;
// Local atom fx, fy, fz
float
dx
;
float
dy
;
float
dz
;
float
r2
;
float
invR
;
float
sig
;
float
sig2
;
float
sig6
;
float
eps
;
float
dEdR
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
!
bExclusionFlag
)
{
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
af
.
x
=
0.0
f
;
af
.
y
=
0.0
f
;
af
.
z
=
0.0
f
;
apos
.
w
*=
cSim
.
epsfac
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1.0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
*=
invR
*
invR
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
// Write results
float4
of
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0.0
f
;
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pAttr
[
j
];
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
apos
.
w
*=
cSim
.
epsfac
;
for
(
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1.0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
*=
invR
*
invR
;
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
sNext
[
tj
];
}
// Write results
float4
of
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0.0
f
;
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
}
}
else
// bExclusion
{
// Read exclusion data
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
q
=
apos
.
w
;
sA
[
threadIdx
.
x
].
sig
=
a
.
x
;
sA
[
threadIdx
.
x
].
eps
=
a
.
y
;
af
.
x
=
0.0
f
;
af
.
y
=
0.0
f
;
af
.
z
=
0.0
f
;
apos
.
w
*=
cSim
.
epsfac
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1.0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
j
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
dEdR
=
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0.0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
// Write results
float4
of
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0.0
f
;
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
int
j
=
y
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pAttr
[
j
];
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
q
=
temp
.
w
;
sA
[
threadIdx
.
x
].
sig
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
eps
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
apos
.
w
*=
cSim
.
epsfac
;
for
(
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1.0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
*=
invR
*
invR
;
if
(
!
(
excl
&
0x1
))
{
dEdR
=
0.0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
sNext
[
tj
];
}
// Write results
float4
of
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0.0
f
;
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
}
}
pos
-=
cSim
.
nonbond_workBlock
;
}
}
void
kCalculateCDLJForces_12
(
gpuContext
gpu
)
{
// printf("kCalculateCDLJForces_12\n");
kCalculateCDLJForces_12_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateCDLJForces_12"
);
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
8055a541
...
...
@@ -134,7 +134,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
unsigned
int
xi
=
x
>>
GRIDBITS
;
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
...
...
@@ -299,8 +301,11 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
}
else
// bExclusion
{
unsigned
int
excl
=
cSim
.
pExclusion
[
x
*
cSim
.
exclusionStride
+
y
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1_12.cu
deleted
100755 → 0
View file @
b98859ec
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/kCalculateObcGbsaForces2_12.cu
deleted
100755 → 0
View file @
b98859ec
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
sr2
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
// float sum;
};
__shared__
Atom
sA
[
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
];
__shared__
unsigned
int
sWorkUnit
[
GT2XX_NONBOND_WORKUNITS_PER_SM
];
__shared__
unsigned
int
sNext
[
GRID
];
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateObcGbsaForces2_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateObcGbsaForces2_12Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kCalculateObcGbsaForces2_12_kernel
()
{
// Read queue of work blocks once so the remainder of
// kernel can run asynchronously
int
pos
=
cSim
.
bf2WorkUnitsPerBlock
*
blockIdx
.
x
+
min
(
blockIdx
.
x
,
cSim
.
bf2WorkUnitsPerBlockRemainder
);
int
end
=
cSim
.
bf2WorkUnitsPerBlock
*
(
blockIdx
.
x
+
1
)
+
min
((
blockIdx
.
x
+
1
),
cSim
.
bf2WorkUnitsPerBlockRemainder
);
if
(
threadIdx
.
x
<
end
-
pos
)
{
sWorkUnit
[
threadIdx
.
x
]
=
cSim
.
pWorkUnit
[
pos
+
threadIdx
.
x
];
}
if
(
threadIdx
.
x
<
GRID
)
{
sNext
[
threadIdx
.
x
]
=
(
threadIdx
.
x
+
1
)
&
(
GRID
-
1
);
}
__syncthreads
();
// Now change pos and end to reflect work queue just read
// into shared memory
end
=
end
-
pos
;
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
sWorkUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pObcData
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
// float sum = 0.0f;
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
// float oneOverR = 1.0f / a.x;
sA
[
threadIdx
.
x
].
r
=
a
.
x
;
sA
[
threadIdx
.
x
].
sr
=
a
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
a
.
y
*
a
.
y
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
sNext
[
tgx
];
j
!=
tgx
;
j
=
sNext
[
j
])
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Atom I Born forces and sum
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
float
l_ij
=
1.0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1.0
f
/
rScaledRadiusJ
;
float
rInverse
=
1.0
f
/
r
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
r2Inverse
=
rInverse
*
rInverse
;
float
t1
=
log
(
u_ij
/
l_ij
);
float
t2
=
(
l_ij2
-
u_ij2
);
float
t3
=
t2
*
rInverse
;
t1
*=
rInverse
;
// Born Forces term
float
term
=
0.125
f
*
(
1.000
f
+
psA
[
j
].
sr2
*
r2Inverse
)
*
t3
+
0.250
f
*
t1
*
r2Inverse
;
float
dE
=
fb
*
term
;
// Born sum term
// term = l_ij - u_ij +
// -0.25f * r * t2 +
// 0.50f * t1 +
// (0.25f * psA[j].sr2) * t3;
// if (a.x < (psA[j].sr - r))
// {
// term += 2.0f * (oneOverR - l_ij);
// }
if
(
a
.
x
>=
rScaledRadiusJ
)
{
dE
=
/*term =*/
0.0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
j
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
j
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
j
].
fz
+=
d
;
// sum += term;
}
// Write results
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
float4
of
;
of
.
x
=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0.0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
// cSim.pBornSum[offset] = sum;
}
else
{
// Read fixed atom data into registers and GRF
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float2
temp1
=
cSim
.
pObcData
[
j
];
sA
[
threadIdx
.
x
].
fb
=
cSim
.
pBornForce
[
j
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0.0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0.0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0.0
f
;
// sA[threadIdx.x].sum = 0.0f;
// float sum = 0.0f;
float
sr2
=
a
.
y
*
a
.
y
;
// float oneOverR = 1.0f / a.x;
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
temp1
.
y
*
temp1
.
y
;
for
(
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
r2Inverse
=
1.0
f
/
r2
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rInverse
=
1.0
f
/
r
;
float
l_ijJ
=
1.0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
l_ijI
=
1.0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
a
.
y
));
float
u_ijJ
=
1.0
f
/
rScaledRadiusJ
;
float
u_ijI
=
1.0
f
/
rScaledRadiusI
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t3J
=
t2J
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
t1J
*=
rInverse
;
t1I
*=
rInverse
;
// Born Forces term
float
term
=
0.125
f
*
(
1.000
f
+
psA
[
tj
].
sr2
*
r2Inverse
)
*
t3J
+
0.250
f
*
t1J
*
r2Inverse
;
float
dE
=
fb
*
term
;
// Atom I Born sum term
// term = l_ijJ - u_ijJ +
// -0.25f * r * t2J +
// 0.50f * t1J +
// (0.25f * psA[tj].sr2) * t3J;
// if (a.x < (psA[tj].sr - r))
// {
// term += 2.0f * (oneOverR - l_ijJ);
// }
if
(
a
.
x
>=
rScaledRadiusJ
)
{
dE
=
/*term =*/
0.0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
// sum += term;
// Atom J Born sum term
term
=
0.125
f
*
(
1.000
f
+
sr2
*
r2Inverse
)
*
t3I
+
0.250
f
*
t1I
*
r2Inverse
;
dE
=
psA
[
tj
].
fb
*
term
;
// term = l_ijI - u_ijI +
// -0.25f * r * t2I +
// 0.50f * t1I +
// (0.25f * sr2) * t3I;
// if (psA[tj].r < (a.y - r))
// {
// term += 2.0f * ((1.0f / psA[tj].r) - l_ijI);
// }
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
)
{
dE
=
/*term =*/
0.0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
// psA[tj].sum += term;
tj
=
sNext
[
tj
];
}
// Write results
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
float4
of
;
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0.0
f
;
cSim
.
pForce4b
[
offset
]
=
of
;
// cSim.pBornSum[offset] = sum;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4b
[
offset
]
=
of
;
// cSim.pBornSum[offset] = sA[threadIdx.x].sum;
}
pos
-=
cSim
.
bornForce2_workBlock
;
}
}
void
kCalculateObcGbsaForces2_12
(
gpuContext
gpu
)
{
// printf("kCalculateObcGbsaForces2_12\n");
kCalculateObcGbsaForces2_12_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateObcGbsaForces2_12"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment