Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
50a15fb0
"platforms/opencl/tests/TestOpenCLQTBIntegrator.cpp" did not exist on "a402046652cab8ba297aa423e4cb57c904525144"
Commit
50a15fb0
authored
Oct 13, 2010
by
Mark Friedrichs
Browse files
Mods to reduce calculation of ixns within blocks for Vdw
parent
e6c19b54
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
71 deletions
+121
-71
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+0
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+6
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
+114
-63
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdwParticle.h
...tforms/cuda/src/kernels/kCalculateAmoebaCudaVdwParticle.h
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
50a15fb0
...
...
@@ -268,14 +268,6 @@ if( atomI == targetAtom ){
// No interactions in this block.
}
else
{
if
(
lasty
!=
y
)
{
// load shared data
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
sA
[
threadIdx
.
x
].
force
[
0
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
1
]
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
force
[
2
]
=
0
.
0
f
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
50a15fb0
...
...
@@ -250,6 +250,12 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
}
}
__device__
void
sumTempBuffer
(
Vdw14_7Particle
&
atomI
,
Vdw14_7Particle
&
atomJ
){
atomI
.
tempForce
[
0
]
+=
atomJ
.
tempForce
[
0
];
atomI
.
tempForce
[
1
]
+=
atomJ
.
tempForce
[
1
];
atomI
.
tempForce
[
2
]
+=
atomJ
.
tempForce
[
2
];
}
static
void
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpuContext
amoebaGpu
,
CUDAStream
<
float
>*
vdwOutputArray
,
CUDAStream
<
float4
>*
forceOutputArray
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.h
View file @
50a15fb0
...
...
@@ -212,12 +212,12 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
if
(
flags
==
0
)
{
}
else
{
#endif
// zero shared fields
zeroVdw14_7SharedForce
(
&
(
sA
[
threadIdx
.
x
])
);
if
(
bExclusionFlag
){
if
(
bExclusionFlag
)
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
...
...
@@ -231,20 +231,27 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
{
float
ijForce
[
3
];
#ifdef USE_CUTOFF
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
#else
unsigned
int
jIdx
=
tj
;
#endif
// get combined sigma and epsilon
float
combindedSigma
;
float
combindedEpsilon
;
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
localParticle
.
sigma
,
psA
[
t
j
].
sigma
,
&
combindedSigma
,
epsilonCombiningRule
,
localParticle
.
epsilon
,
psA
[
t
j
].
epsilon
,
&
combindedEpsilon
);
getVdw14_7CombindedSigmaEpsilon_kernel
(
sigmaCombiningRule
,
localParticle
.
sigma
,
psA
[
j
Idx
].
sigma
,
&
combindedSigma
,
epsilonCombiningRule
,
localParticle
.
epsilon
,
psA
[
j
Idx
].
epsilon
,
&
combindedEpsilon
);
// calculate force
float
energy
;
ijForce
[
0
]
=
psA
[
t
j
].
x
-
localParticle
.
x
;
ijForce
[
1
]
=
psA
[
t
j
].
y
-
localParticle
.
y
;
ijForce
[
2
]
=
psA
[
t
j
].
z
-
localParticle
.
z
;
ijForce
[
0
]
=
psA
[
j
Idx
].
x
-
localParticle
.
x
;
ijForce
[
1
]
=
psA
[
j
Idx
].
y
-
localParticle
.
y
;
ijForce
[
2
]
=
psA
[
j
Idx
].
z
-
localParticle
.
z
;
if
(
cAmoebaSim
.
vdwUsePBC
)
{
ijForce
[
0
]
-=
floor
(
ijForce
[
0
]
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
...
...
@@ -259,32 +266,72 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// mask out excluded ixns
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
t
j
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
j
Idx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
if
(
mask
&&
bExclusionFlag
){
unsigned
int
maskIndex
=
1
<<
t
j
;
unsigned
int
maskIndex
=
1
<<
j
Idx
;
mask
=
(
exclusionMask
&
maskIndex
)
?
0
:
1
;
}
if
(
mask
==
0
)
{
energy
=
ijForce
[
0
]
=
ijForce
[
1
]
=
ijForce
[
2
]
=
0
.
0
f
;
}
// accumulate force for atomI
forceSum
[
0
]
+=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
forceSum
[
1
]
+=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
forceSum
[
2
]
+=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
forceSum
[
0
]
+=
ijForce
[
0
];
forceSum
[
1
]
+=
ijForce
[
1
];
forceSum
[
2
]
+=
ijForce
[
2
];
// accumulate force for atomJ
totalEnergy
+=
energy
;
psA
[
tj
].
force
[
0
]
-=
mask
?
ijForce
[
0
]
:
0
.
0
f
;
psA
[
tj
].
force
[
1
]
-=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
psA
[
tj
].
force
[
2
]
-=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
#ifndef USE_CUTOFF
psA
[
jIdx
].
force
[
0
]
-=
ijForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
ijForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
ijForce
[
2
];
#else
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
force
[
0
]
-=
ijForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
ijForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
ijForce
[
2
];
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
ijForce
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
ijForce
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
ijForce
[
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
}
}
#endif
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
t
j
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
t
j
)
:
atomI
;
if
(
atomI
==
targetAtom
||
(
y
+
j
Idx
)
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
j
Idx
)
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
t
j
);
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
Idx
);
debugArray
[
index
].
z
=
-
3
.
0
;
debugArray
[
index
].
w
=
(
float
)
(
mask
+
1
);
...
...
@@ -311,12 +358,16 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
debugArray
[
index
].
y
=
mask
?
ijForce
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
ijForce
[
2
]
:
0
.
0
f
;
}
#endif
#ifdef USE_CUTOFF
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// end of j-loop
#ifdef USE_CUTOFF
}
}
#endif
// Write results
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdwParticle.h
View file @
50a15fb0
...
...
@@ -13,6 +13,7 @@ struct Vdw14_7Particle {
float
epsilon
;
float
force
[
3
];
float
tempForce
[
3
];
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment