Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
af4d503a
Commit
af4d503a
authored
Apr 13, 2011
by
Mark Friedrichs
Browse files
Minor optimizations
parent
d4559528
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
77 additions
and
78 deletions
+77
-78
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+22
-19
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+26
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+29
-58
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
af4d503a
//-----------------------------------------------------------------------------------------
//
/
-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
...
...
@@ -115,7 +115,7 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
__device__
static
void
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
float
*
energy
)
{
float
term
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
fterm
=
-
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
fterm
=
-
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
cii
=
atomI
.
q
*
atomI
.
q
;
...
...
@@ -141,7 +141,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir
__device__
static
void
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
)
{
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
uix
=
0.5
f
*
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
]);
float
uiy
=
0.5
f
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
...
...
@@ -175,8 +175,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
...
...
@@ -209,7 +207,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
qk8
=
atomJ
.
labFrameQuadrupole
[
7
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
8
];
// calculate the real space error function terms
;
// calculate the real space error function terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
...
...
@@ -236,7 +234,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
alsq2n
*=
alsq2
;
float
bn5
=
(
9.0
f
*
bn4
+
alsq2n
*
exp2a
)
/
r2
;
// apply Thole polarization damping to scale factors
;
// apply Thole polarization damping to scale factors
float
rr1
=
1.0
f
/
r
;
float
rr3
=
rr1
/
r2
;
...
...
@@ -511,7 +509,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
ei
=
ei
-
erli
;
forceTorqueEnergy
[
0
].
w
=
-
conversionFactor
*
(
e
+
ei
);
forceTorqueEnergy
[
0
].
w
=
(
e
+
ei
);
// increment the total intramolecular energy; assumes;
// intramolecular distances are less than half of cell;
...
...
@@ -843,18 +841,17 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// increment gradient due to force and torque on first site;
forceTorqueEnergy
[
0
].
x
=
conversionFactor
*
(
ftm21
+
ftm2i1
);
forceTorqueEnergy
[
0
].
y
=
conversionFactor
*
(
ftm22
+
ftm2i2
);
forceTorqueEnergy
[
0
].
z
=
conversionFactor
*
(
ftm23
+
ftm2i3
);
forceTorqueEnergy
[
0
].
x
=
(
ftm21
+
ftm2i1
);
forceTorqueEnergy
[
0
].
y
=
(
ftm22
+
ftm2i2
);
forceTorqueEnergy
[
0
].
z
=
(
ftm23
+
ftm2i3
);
conversionFactor
*=
-
1.0
;
forceTorqueEnergy
[
1
].
x
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
forceTorqueEnergy
[
1
].
y
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
forceTorqueEnergy
[
1
].
z
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
forceTorqueEnergy
[
1
].
x
=
(
ttm21
+
ttm2i1
);
forceTorqueEnergy
[
1
].
y
=
(
ttm22
+
ttm2i2
);
forceTorqueEnergy
[
1
].
z
=
(
ttm23
+
ttm2i3
);
forceTorqueEnergy
[
2
].
x
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
forceTorqueEnergy
[
2
].
y
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
forceTorqueEnergy
[
2
].
x
=
(
ttm31
+
ttm3i1
);
forceTorqueEnergy
[
2
].
y
=
(
ttm32
+
ttm3i2
);
forceTorqueEnergy
[
2
].
z
=
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
...
...
@@ -1138,7 +1135,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticCutoffForces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%
l
u shrd=%
l
u ixnCt=%
l
u workUnits=%u gpu->nonbond_threads_per_block=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
PmeDirectElectrostaticParticle
),
(
sizeof
(
PmeDirectElectrostaticParticle
))
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
nonbond_threads_per_block
);
...
...
@@ -1159,6 +1156,12 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
else
{
/*
if (gpu->sm_version >= SM_20)
cudaFuncSetCacheConfig(kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel, cudaFuncCachePreferL1 );
//cudaFuncSetCacheConfig(kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel, cudaFuncCachePreferShared );
*/
kCalculateAmoebaPmeDirectElectrostaticCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
PmeDirectElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
af4d503a
...
...
@@ -58,6 +58,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
float4
forceTorqueEnergy
[
3
];
float
scalingFactors
[
LastScalingIndex
];
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
while
(
pos
<
end
)
{
...
...
@@ -203,6 +204,14 @@ if( atomI == targetAtom || atomJ == targetAtom ){
totalEnergy
+=
energy
;
}
localParticle
.
force
[
0
]
*=
conversionFactor
;
localParticle
.
force
[
1
]
*=
conversionFactor
;
localParticle
.
force
[
2
]
*=
conversionFactor
;
localParticle
.
torque
[
0
]
*=
-
conversionFactor
;
localParticle
.
torque
[
1
]
*=
-
conversionFactor
;
localParticle
.
torque
[
2
]
*=
-
conversionFactor
;
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
...
...
@@ -386,6 +395,22 @@ if( atomI == targetAtom || atomJ == targetAtom ){
}
// end of j-loop
localParticle
.
force
[
0
]
*=
conversionFactor
;
localParticle
.
force
[
1
]
*=
conversionFactor
;
localParticle
.
force
[
2
]
*=
conversionFactor
;
localParticle
.
torque
[
0
]
*=
-
conversionFactor
;
localParticle
.
torque
[
1
]
*=
-
conversionFactor
;
localParticle
.
torque
[
2
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
0
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
1
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
2
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
0
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
1
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
2
]
*=
-
conversionFactor
;
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
...
...
@@ -415,5 +440,5 @@ if( atomI == targetAtom || atomJ == targetAtom ){
}
pos
++
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
totalEnergy
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
-
=
conversionFactor
*
totalEnergy
;
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
af4d503a
...
...
@@ -82,12 +82,12 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
...
...
@@ -116,66 +116,41 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
preFactor1
=
rr3
-
bn1
;
float
preFactor2
=
bn2
-
rr5
;
bn1
*=
-
1.0
f
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
preFactor3
=
preFactor2
*
dukr
;
f
loat
fimd0
=
bn
1
*
atomJ
.
inducedDipole
[
0
]
+
bn2
*
dukr
*
xr
;
f
loat
fimd1
=
bn
1
*
atomJ
.
inducedDipole
[
1
]
+
bn2
*
dukr
*
yr
;
f
loat
fimd2
=
bn
1
*
atomJ
.
inducedDipole
[
2
]
+
bn2
*
dukr
*
zr
;
f
ields
[
0
].
x
=
preFactor3
*
xr
+
preFactor
1
*
atomJ
.
inducedDipole
[
0
];
f
ields
[
1
].
x
=
preFactor3
*
yr
+
preFactor
1
*
atomJ
.
inducedDipole
[
1
];
f
ields
[
2
].
x
=
preFactor3
*
zr
+
preFactor
1
*
atomJ
.
inducedDipole
[
2
];
float
fkmd0
=
bn1
*
atomI
.
inducedDipole
[
0
]
+
bn2
*
duir
*
xr
;
float
fkmd1
=
bn1
*
atomI
.
inducedDipole
[
1
]
+
bn2
*
duir
*
yr
;
float
fkmd2
=
bn1
*
atomI
.
inducedDipole
[
2
]
+
bn2
*
duir
*
zr
;
float
fimp0
=
bn1
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn2
*
pukr
*
xr
;
float
fimp1
=
bn1
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn2
*
pukr
*
yr
;
float
fimp2
=
bn1
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn2
*
pukr
*
zr
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
duir
;
f
loat
fkmp0
=
bn1
*
atomI
.
inducedDipolePolar
[
0
]
+
bn2
*
puir
*
xr
;
f
loat
fkmp1
=
bn1
*
atomI
.
inducedDipolePolar
[
1
]
+
bn2
*
puir
*
yr
;
f
loat
fkmp2
=
bn1
*
atomI
.
inducedDipolePolar
[
2
]
+
bn2
*
puir
*
zr
;
f
ields
[
0
].
y
=
preFactor3
*
xr
+
preFactor1
*
atomI
.
inducedDipole
[
0
]
;
f
ields
[
1
].
y
=
preFactor3
*
yr
+
preFactor1
*
atomI
.
inducedDipole
[
1
]
;
f
ields
[
2
].
y
=
preFactor3
*
zr
+
preFactor1
*
atomI
.
inducedDipole
[
2
]
;
rr3
*=
-
1.0
f
;
float
fid0
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
float
fid1
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
float
fid2
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
float
fkd0
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
float
fkd1
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
float
fkd2
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
pukr
;
f
loat
fip0
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
f
loat
fip1
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
f
loat
fip2
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
f
ields
[
0
].
z
=
preFactor3
*
xr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
0
];
f
ields
[
1
].
z
=
preFactor3
*
yr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
1
];
f
ields
[
2
].
z
=
preFactor3
*
zr
+
preFactor1
*
atomJ
.
inducedDipolePolar
[
2
];
float
fkp0
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
float
fkp1
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
float
fkp2
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
// increment the field at each site due to this interaction
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
preFactor3
=
preFactor2
*
puir
;
fields
[
0
].
w
=
preFactor3
*
xr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
0
];
fields
[
1
].
w
=
preFactor3
*
yr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
1
];
fields
[
2
].
w
=
preFactor3
*
zr
+
preFactor1
*
atomI
.
inducedDipolePolar
[
2
];
fields
[
0
].
x
=
fimd0
-
fid0
;
fields
[
0
].
y
=
fkmd0
-
fkd0
;
fields
[
0
].
z
=
fimp0
-
fip0
;
fields
[
0
].
w
=
fkmp0
-
fkp0
;
fields
[
1
].
x
=
fimd1
-
fid1
;
fields
[
1
].
y
=
fkmd1
-
fkd1
;
fields
[
1
].
z
=
fimp1
-
fip1
;
fields
[
1
].
w
=
fkmp1
-
fkp1
;
fields
[
2
].
x
=
fimd2
-
fid2
;
fields
[
2
].
y
=
fkmd2
-
fkd2
;
fields
[
2
].
z
=
fimp2
-
fip2
;
fields
[
2
].
w
=
fkmp2
-
fkp2
;
}
else
{
fields
[
0
].
x
=
0.0
f
;
...
...
@@ -240,7 +215,7 @@ static void kInitializeMutualInducedField_kernel(
float
*
polarizability
)
{
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
fixedEField
[
pos
]
*=
polarizability
[
pos
];
...
...
@@ -323,7 +298,7 @@ static void kSorUpdateMutualInducedField_kernel(
float
*
matrixProduct
,
float
*
matrixProductP
)
{
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
...
...
@@ -415,9 +390,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Cutoff -- use warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u
\n
"
,
methodName
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu ixnCt=%lu workUnits=%u
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
MutualInducedParticle
),
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
...
@@ -426,8 +400,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
if
(
gpu
->
bOutputBufferPerWarp
){
//gpu->sim.pInteractingWorkUnit,
//amoebaGpu->psWorkUnit->_pDevData,
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
...
...
@@ -591,7 +563,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
LAUNCHERROR
(
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop
\n
"
);
// post matrix multiply
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment