Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
06089c9a
Commit
06089c9a
authored
Jul 27, 2011
by
Mark Friedrichs
Browse files
No substantive changes -- cleanup
parent
4d6895b0
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
125 deletions
+67
-125
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+4
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+53
-17
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+7
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+1
-96
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
...tforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
+2
-2
No files found.
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
06089c9a
...
...
@@ -1642,9 +1642,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
}
if
(
0
&&
amoebaGpu
->
log
)
fprintf
(
amoebaGpu
->
log
,
"Z1 %4d %d [%4d %4d %4d]
\n
"
,
ii
,
axisType
[
ii
],
multipoleParticleX
[
ii
],
multipoleParticleY
[
ii
],
multipoleParticleZ
[
ii
]
);
if
(
0
&&
amoebaGpu
->
log
){
fprintf
(
amoebaGpu
->
log
,
"Z1 %4d %d [%4d %4d %4d] dmp/thole %15.7e %15.7e
\n
"
,
ii
,
axisType
[
ii
],
multipoleParticleX
[
ii
],
multipoleParticleY
[
ii
],
multipoleParticleZ
[
ii
],
dampingFactors
[
ii
],
tholes
[
ii
]
);
}
// charges
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
06089c9a
...
...
@@ -513,17 +513,9 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
}
static
__device__
void
zeroElectrostaticParticle
(
struct
ElectrostaticParticle
*
sA
){
// coordinates & charge
sA
->
force
[
0
]
=
0.0
f
;
sA
->
force
[
1
]
=
0.0
f
;
sA
->
force
[
2
]
=
0.0
f
;
/*
sA->torque[0] = 0.0f;
sA->torque[1] = 0.0f;
sA->torque[2] = 0.0f;
*/
}
#undef SUB_METHOD_NAME
...
...
@@ -649,7 +641,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
maxSlots
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysData
,
0
,
sizeof
(
float
)
*
4
*
maxSlots
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
unsigned
int
targetAtom
=
237
;
//unsigned int targetAtom = 1137;
unsigned
int
targetAtom
=
1
;
#endif
// on first pass, set threads/block
...
...
@@ -675,8 +668,14 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
}
else
{
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
}
#else
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
...
...
@@ -684,8 +683,44 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
}
#endif
LAUNCHERROR
(
"kCalculateAmoebaCudaElectrostaticN2Forces"
);
#ifdef AMOEBA_DEBUG
if
(
0
){
debugArray
->
Download
();
std
::
vector
<
double
>
conversions
;
conversions
.
push_back
(
0.1
f
/
4.184
f
);
conversions
.
push_back
(
0.1
f
/
4.184
f
);
unsigned
int
kkBlocks
=
4
;
(
void
)
fprintf
(
stderr
,
"
\n
Target atom output %5u
\n
"
,
targetAtom
);
for
(
unsigned
int
ii
=
0
;
ii
<
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
double
sum
=
0.0
;
for
(
unsigned
int
kk
=
0
;
kk
<
kkBlocks
&&
sum
==
0.0
;
kk
++
){
unsigned
int
index
=
ii
+
kk
*
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
sum
+=
debugArray
->
_pSysData
[
index
].
x
+
debugArray
->
_pSysData
[
index
].
y
+
debugArray
->
_pSysData
[
index
].
z
+
debugArray
->
_pSysData
[
index
].
w
;
}
if
(
sum
>
0.0
){
(
void
)
fprintf
(
stderr
,
"%5u"
,
ii
);
for
(
unsigned
int
kk
=
0
;
kk
<
kkBlocks
;
kk
++
){
unsigned
int
index
=
ii
+
kk
*
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
(
void
)
fprintf
(
stderr
,
" %15.7e %15.7e %15.7e %5.1f"
,
conversions
[
kk
]
*
debugArray
->
_pSysData
[
index
].
x
,
conversions
[
kk
]
*
debugArray
->
_pSysData
[
index
].
y
,
conversions
[
kk
]
*
debugArray
->
_pSysData
[
index
].
z
,
debugArray
->
_pSysData
[
index
].
w
);
if
(
((
kk
+
1
)
%
2
)
==
0
&&
(
kk
!=
(
kkBlocks
-
1
)
)
){
(
void
)
fprintf
(
stderr
,
"
\n
%5u"
,
ii
);
}
}
(
void
)
fprintf
(
stderr
,
"
\n
"
);
if
(
kkBlocks
>
2
){
(
void
)
fprintf
(
stderr
,
"
\n
"
);
}
}
}
}
#endif
#ifdef AMOEBA_DEBUG
if
(
0
){
VectorOfDoubleVectors
outputVector
;
...
...
@@ -694,7 +729,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
fileId
.
push_back
(
call
++
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"Temp
1
"
);
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"
Electrostatic
Temp"
);
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
reduceAndCopyCUDAStreamFloat4
(
gpu
->
psForce4
,
temp
,
1.0
);
...
...
@@ -706,6 +741,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaElectrostaticTorque"
,
fileId
,
outputVector
);
delete
temp
;
}
#endif
if
(
addTorqueToForce
){
kReduceTorque
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
06089c9a
...
...
@@ -42,10 +42,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
){
#ifdef AMOEBA_DEBUG
float4
pullBack
[
20
];
#endif
extern
__shared__
ElectrostaticParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
...
...
@@ -210,6 +206,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA
[
tj
].
force
[
0
]
-=
force
[
0
];
psA
[
tj
].
force
[
1
]
-=
force
[
1
];
psA
[
tj
].
force
[
2
]
-=
force
[
2
];
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
@@ -243,6 +240,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
zeroElectrostaticParticle
(
&
(
sA
[
threadIdx
.
x
])
);
zeroElectrostaticParticle
(
&
localParticle
);
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
unsigned
int
atomJ
=
y
+
tj
;
...
...
@@ -264,6 +262,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA
[
tj
].
force
[
0
]
+=
force
[
0
];
psA
[
tj
].
force
[
1
]
+=
force
[
1
];
psA
[
tj
].
force
[
2
]
+=
force
[
2
];
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
@@ -298,5 +297,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
pos
++
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
(
conversionFactor
*
totalEnergy
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
06089c9a
...
...
@@ -110,7 +110,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
%s
numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu
\n
"
,
methodName
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
cudaComputeAmoebaFixedEField
numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu
\n
"
,
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
gpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
...
@@ -121,23 +121,13 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
kCalculateAmoebaFixedE_FieldN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
threadsPerBlock
>>>
(
gpu
->
psWorkUnit
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaFixedE_FieldN2Forces_kernel"
);
...
...
@@ -198,91 +188,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
/*
(void) fprintf( amoebaGpu->log, "DebugQ\n" );
debugArray->Download();
if( 0 ){
int ii = targetAtom;
float sum[2][3] = { { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 0.0f } };
(void) fprintf( amoebaGpu->log,"\n" );
for( int jj = 0; jj < 1248; jj++ ){
int debugIndex = jj;
if( jj == ii )continue;
(void) fprintf( amoebaGpu->log,"\n\n%4d %4d rrs\n[%16.9e %16.9e %16.9e %16.9e]\n",
ii, jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y1 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[0][0] += debugArray->_pSysData[debugIndex].x;
sum[0][1] += debugArray->_pSysData[debugIndex].y;
sum[0][2] += debugArray->_pSysData[debugIndex].z;
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y2 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[1][0] += debugArray->_pSysData[debugIndex].x;
sum[1][1] += debugArray->_pSysData[debugIndex].y;
sum[1][2] += debugArray->_pSysData[debugIndex].z;
}
(void) fprintf( amoebaGpu->log,"SumQ [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n",
sum[0][0], sum[0][1], sum[0][2],
sum[1][0], sum[1][1], sum[1][2] );
}
for( unsigned int ii = 0; ii < debugArray->_stride; ii++ ){
int print;
if( debugArray->_pSysData[ii].x != 0.0f || debugArray->_pSysData[ii].y != 0.0f ||
debugArray->_pSysData[ii].y != 0.0f || debugArray->_pSysData[ii].w != 0.0f ||
debugArray->_pSysData[ii].x != debugArray->_pSysData[ii].x ||
debugArray->_pSysData[ii].y != debugArray->_pSysData[ii].y ||
debugArray->_pSysData[ii].z != debugArray->_pSysData[ii].z ||
debugArray->_pSysData[ii].w != debugArray->_pSysData[ii].w ){
print = 0;
} else {
print = 0;
}
if( print ){
unsigned int atomI = ii/amoebaGpu->paddedNumberOfAtoms;
unsigned int atomJ = ii - atomI*amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log, "%5u [%5u %5u] ", ii, atomI, atomJ);
(void) fprintf( amoebaGpu->log, "%14.6e %14.6e %14.6e %14.6e\n",
debugArray->_pSysData[ii].x,
debugArray->_pSysData[ii].y,
debugArray->_pSysData[ii].z,
debugArray->_pSysData[ii].w );
}
}
*/
// write results to file
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
View file @
06089c9a
...
...
@@ -447,7 +447,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId
.
push_back
(
call
++
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"Temp
1
"
);
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"Temp
_MapTorqueAndAddToForce
"
);
reduceAndCopyCUDAStreamFloat4
(
gpu
->
psForce4
,
temp
,
1.0
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
temp
,
outputVector
,
NULL
,
1.0
f
/
4.184
f
);
...
...
@@ -479,7 +479,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId
.
push_back
(
call
++
);
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"Temp
1
"
);
CUDAStream
<
float
>*
temp
=
new
CUDAStream
<
float
>
(
3
*
paddedNumberOfAtoms
,
1
,
"Temp
MapTorqueAndAddToForce2
"
);
reduceAndCopyCUDAStreamFloat4
(
gpu
->
psForce4
,
temp
,
1.0
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
temp
,
outputVector
,
NULL
,
1.0
f
/
4.184
f
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment