Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6bad9d44
Commit
6bad9d44
authored
Apr 08, 2011
by
Mark Friedrichs
Browse files
Removal of several arrays no longer needed
parent
1beac75d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
273 additions
and
539 deletions
+273
-539
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
+153
-140
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
+3
-14
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+51
-105
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+31
-38
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+35
-44
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.cu
.../platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.cu
+0
-86
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.h
...a/platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.h
+0
-112
No files found.
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaGpu.cpp
View file @
6bad9d44
...
@@ -97,87 +97,94 @@ void gpuPrintCudaStream( std::string name,
...
@@ -97,87 +97,94 @@ void gpuPrintCudaStream( std::string name,
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamFloat
(
CUDAStream
<
float
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamFloat
(
CUDAStream
<
float
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamFloat2
(
CUDAStream
<
float2
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamFloat2
(
CUDAStream
<
float2
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float2
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float2
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
2
*
sizeof
(
float
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamFloat4
(
CUDAStream
<
float4
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamFloat4
(
CUDAStream
<
float4
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float4
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
float4
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
4
*
sizeof
(
float
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamUnsignedInt
(
CUDAStream
<
unsigned
int
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamUnsignedInt
(
CUDAStream
<
unsigned
int
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
unsigned
int
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
unsigned
int
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
unsigned
int
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamInt
(
CUDAStream
<
int
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamInt
(
CUDAStream
<
int
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamInt2
(
CUDAStream
<
int2
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamInt2
(
CUDAStream
<
int2
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int2
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int2
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
2
*
sizeof
(
int
);
}
}
extern
"C"
extern
"C"
void
gpuPrintCudaStreamInt4
(
CUDAStream
<
int4
>*
cUDAStream
,
FILE
*
log
)
int
gpuPrintCudaStreamInt4
(
CUDAStream
<
int4
>*
cUDAStream
,
FILE
*
log
)
{
{
if
(
cUDAStream
==
NULL
)
return
;
if
(
cUDAStream
==
NULL
)
return
0
;
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
gpuPrintCudaStream
(
cUDAStream
->
_name
.
c_str
(),
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
,
cUDAStream
->
_subStreams
,
cUDAStream
->
_stride
,
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int4
),
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
sizeof
(
int4
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevStream
),
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
static_cast
<
void
*>
(
cUDAStream
->
_pSysData
),
static_cast
<
void
*>
(
cUDAStream
->
_pDevData
),
log
);
return
cUDAStream
->
_length
*
cUDAStream
->
_subStreams
*
4
*
sizeof
(
int
);
}
}
extern
"C"
extern
"C"
...
@@ -186,6 +193,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -186,6 +193,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
if
(
log
==
NULL
)
return
;
if
(
log
==
NULL
)
return
;
_gpuContext
*
gpu
=
amoebaGpu
->
gpuContext
;
_gpuContext
*
gpu
=
amoebaGpu
->
gpuContext
;
int
totalMemory
=
0
;
(
void
)
fprintf
(
log
,
"cudaAmoebaGmxSimulation:
\n\n
"
);
(
void
)
fprintf
(
log
,
"cudaAmoebaGmxSimulation:
\n\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
...
@@ -206,32 +215,32 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -206,32 +215,32 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" outputBuffers %u
\n
"
,
gpu
->
sim
.
outputBuffers
);
(
void
)
fprintf
(
log
,
" outputBuffers %u
\n
"
,
gpu
->
sim
.
outputBuffers
);
(
void
)
fprintf
(
log
,
" workUnits %u
\n
"
,
amoebaGpu
->
workUnits
);
(
void
)
fprintf
(
log
,
" workUnits %u
\n
"
,
amoebaGpu
->
workUnits
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psEnergy
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psEnergy
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psForce4
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psForce4
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psPosq4
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
gpuContext
->
psPosq4
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
gpuContext
->
psObcData
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
gpuContext
->
psObcData
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psBornForce
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
gpuContext
->
psBornForce
,
log
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
" amoebaBonds %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBonds
);
(
void
)
fprintf
(
log
,
" amoebaBonds %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBonds
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_1
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_1
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_2
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_2
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_3
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_3
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_4
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_3_4
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_1_1
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_1_1
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_1_2
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkArray_1_2
,
log
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
gpuPrintCudaStreamUnsignedInt
(
amoebaGpu
->
psWorkUnit
,
log
);
totalMemory
+=
gpuPrintCudaStreamUnsignedInt
(
amoebaGpu
->
psWorkUnit
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psScalingIndicesIndex
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt
(
amoebaGpu
->
psScalingIndicesIndex
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
ps_D_ScaleIndices
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt
(
amoebaGpu
->
ps_D_ScaleIndices
,
log
);
gpuPrintCudaStreamInt2
(
amoebaGpu
->
ps_P_ScaleIndices
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt2
(
amoebaGpu
->
ps_P_ScaleIndices
,
log
);
gpuPrintCudaStreamInt2
(
amoebaGpu
->
ps_M_ScaleIndices
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt2
(
amoebaGpu
->
ps_M_ScaleIndices
,
log
);
if
(
amoebaGpu
->
psAmoebaBondParameter
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaBondParameter
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaBondID
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaBondID
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaBondParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaBondParameter
,
log
);
(
void
)
fprintf
(
log
,
" amoebaBonds %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBonds
);
(
void
)
fprintf
(
log
,
" amoebaBonds %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBonds
);
(
void
)
fprintf
(
log
,
" amoebaBond_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBond_offset
);
(
void
)
fprintf
(
log
,
" amoebaBond_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBond_offset
);
(
void
)
fprintf
(
log
,
" cubic %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBondCubicParameter
);
(
void
)
fprintf
(
log
,
" cubic %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaBondCubicParameter
);
...
@@ -239,9 +248,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -239,9 +248,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaBondID %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaBondID
);
(
void
)
fprintf
(
log
,
" pAmoebaBondID %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaBondID
);
(
void
)
fprintf
(
log
,
" pAmoebaBondParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaBondParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaBondParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaBondParameter
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaAngleID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaAngleID1
,
log
);
gpuPrintCudaStreamInt2
(
amoebaGpu
->
psAmoebaAngleID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt2
(
amoebaGpu
->
psAmoebaAngleID2
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaAngleParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaAngleParameter
,
log
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" amoebaAngles %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaAngles
);
(
void
)
fprintf
(
log
,
" amoebaAngles %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaAngles
);
(
void
)
fprintf
(
log
,
" amoebaAngle_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaAngle_offset
);
(
void
)
fprintf
(
log
,
" amoebaAngle_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaAngle_offset
);
...
@@ -254,9 +263,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -254,9 +263,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaAngleParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaAngleParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaAngleParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaAngleParameter
);
if
(
amoebaGpu
->
psAmoebaInPlaneAngleID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaInPlaneAngleID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaInPlaneAngleID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaInPlaneAngleID1
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaInPlaneAngleID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaInPlaneAngleID2
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaInPlaneAngleParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaInPlaneAngleParameter
,
log
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" amoebaInPlaneAngles %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaInPlaneAngles
);
(
void
)
fprintf
(
log
,
" amoebaInPlaneAngles %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaInPlaneAngles
);
(
void
)
fprintf
(
log
,
" amoebaInPlaneAngle_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaInPlaneAngle_offset
);
(
void
)
fprintf
(
log
,
" amoebaInPlaneAngle_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaInPlaneAngle_offset
);
...
@@ -270,10 +279,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -270,10 +279,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
if
(
amoebaGpu
->
psAmoebaTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionID1
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionID2
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaTorsionParameter1
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaTorsionParameter1
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaTorsionParameter2
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaTorsionParameter2
,
log
);
(
void
)
fprintf
(
log
,
" amoebaTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsions
);
(
void
)
fprintf
(
log
,
" amoebaTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsions
);
(
void
)
fprintf
(
log
,
" amoebaTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsion_offset
);
(
void
)
fprintf
(
log
,
" amoebaTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsion_offset
);
(
void
)
fprintf
(
log
,
" pAmoebaTorsionID1 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaTorsionID1
);
(
void
)
fprintf
(
log
,
" pAmoebaTorsionID1 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaTorsionID1
);
...
@@ -282,10 +291,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -282,10 +291,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaTorsionParameter2 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaTorsionParameter2
);
(
void
)
fprintf
(
log
,
" pAmoebaTorsionParameter2 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaTorsionParameter2
);
if
(
amoebaGpu
->
psAmoebaPiTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaPiTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID1
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID2
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID3
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaPiTorsionID3
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaPiTorsionParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaPiTorsionParameter
,
log
);
(
void
)
fprintf
(
log
,
" amoebaPiTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaPiTorsions
);
(
void
)
fprintf
(
log
,
" amoebaPiTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaPiTorsions
);
(
void
)
fprintf
(
log
,
" amoebaPiTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaPiTorsion_offset
);
(
void
)
fprintf
(
log
,
" amoebaPiTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaPiTorsion_offset
);
...
@@ -295,9 +304,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -295,9 +304,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaPiTorsionParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaPiTorsionParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaPiTorsionParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaPiTorsionParameter
);
if
(
amoebaGpu
->
psAmoebaStretchBendID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaStretchBendID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaStretchBendID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaStretchBendID1
,
log
);
gpuPrintCudaStreamInt2
(
amoebaGpu
->
psAmoebaStretchBendID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt2
(
amoebaGpu
->
psAmoebaStretchBendID2
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaStretchBendParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaStretchBendParameter
,
log
);
(
void
)
fprintf
(
log
,
" amoebaStretchBend %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaStretchBends
);
(
void
)
fprintf
(
log
,
" amoebaStretchBend %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaStretchBends
);
(
void
)
fprintf
(
log
,
" amoebaStretchBend_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaStretchBend_offset
);
(
void
)
fprintf
(
log
,
" amoebaStretchBend_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaStretchBend_offset
);
(
void
)
fprintf
(
log
,
" pAmoebaStretchBendID1 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaStretchBendID1
);
(
void
)
fprintf
(
log
,
" pAmoebaStretchBendID1 %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaStretchBendID1
);
...
@@ -305,9 +314,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -305,9 +314,9 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaStretchBendParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaStretchBendParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaStretchBendParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaStretchBendParameter
);
if
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID1
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaOutOfPlaneBendID2
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaOutOfPlaneBendParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaOutOfPlaneBendParameter
,
log
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBend %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBends
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBend %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBends
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBend_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBend_offset
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBend_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBend_offset
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBendCubicK %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBendCubicK
);
(
void
)
fprintf
(
log
,
" amoebaOutOfPlaneBendCubicK %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaOutOfPlaneBendCubicK
);
...
@@ -319,10 +328,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -319,10 +328,10 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaOutOfPlaneBendParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaOutOfPlaneBendParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaOutOfPlaneBendParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaOutOfPlaneBendParameter
);
if
(
amoebaGpu
->
psAmoebaTorsionTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaTorsionTorsionID1
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID1
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID1
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID2
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID2
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID3
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaTorsionTorsionID3
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaTorsionTorsionGrids
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaTorsionTorsionGrids
,
log
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" amoebaTorsionTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsionTorsions
);
(
void
)
fprintf
(
log
,
" amoebaTorsionTorsions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsionTorsions
);
(
void
)
fprintf
(
log
,
" amoebaTorsionTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsionTorsion_offset
);
(
void
)
fprintf
(
log
,
" amoebaTorsionTorsion_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaTorsionTorsion_offset
);
...
@@ -333,8 +342,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -333,8 +342,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pOutputBufferCounter %p
\n
"
,
amoebaGpu
->
gpuContext
->
pOutputBufferCounter
);
(
void
)
fprintf
(
log
,
" pOutputBufferCounter %p
\n
"
,
amoebaGpu
->
gpuContext
->
pOutputBufferCounter
);
if
(
amoebaGpu
->
psAmoebaUreyBradleyParameter
)(
void
)
fprintf
(
log
,
"
\n
"
);
if
(
amoebaGpu
->
psAmoebaUreyBradleyParameter
)(
void
)
fprintf
(
log
,
"
\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaUreyBradleyID
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaUreyBradleyID
,
log
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaUreyBradleyParameter
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psAmoebaUreyBradleyParameter
,
log
);
(
void
)
fprintf
(
log
,
" amoebaUreyBradleys %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradleys
);
(
void
)
fprintf
(
log
,
" amoebaUreyBradleys %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradleys
);
(
void
)
fprintf
(
log
,
" amoebaUreyBradley_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradley_offset
);
(
void
)
fprintf
(
log
,
" amoebaUreyBradley_offset %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradley_offset
);
(
void
)
fprintf
(
log
,
" cubic %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradleyCubicParameter
);
(
void
)
fprintf
(
log
,
" cubic %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaUreyBradleyCubicParameter
);
...
@@ -343,20 +352,26 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -343,20 +352,26 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pAmoebaUreyBradleyParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaUreyBradleyParameter
);
(
void
)
fprintf
(
log
,
" pAmoebaUreyBradleyParameter %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaUreyBradleyParameter
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
(
void
)
fprintf
(
log
,
"
\n\n
"
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
,
log
);
(
void
)
fprintf
(
log
,
" pMultipoleParticlesIdsAndAxisType %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMultipoleParticlesIdsAndAxisType
);
(
void
)
fprintf
(
log
,
" pMultipoleParticlesIdsAndAxisType %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMultipoleParticlesIdsAndAxisType
);
(
void
)
fprintf
(
log
,
" maxTorqueBufferIndex %d
\n
"
,
amoebaGpu
->
maxTorqueBufferIndex
);
(
void
)
fprintf
(
log
,
" maxTorqueBufferIndex %d
\n
"
,
amoebaGpu
->
maxTorqueBufferIndex
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
,
log
);
int
memory
=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psTorqueMapForce4
,
log
);
if
(
amoebaGpu
->
torqueMapForce4Delete
)
totalMemory
+=
memory
;
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psTorque
,
log
);
(
void
)
fprintf
(
log
,
" psMultipoleParticlesTorqueBufferIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMultipoleParticlesTorqueBufferIndices
);
(
void
)
fprintf
(
log
,
" psMultipoleParticlesTorqueBufferIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMultipoleParticlesTorqueBufferIndices
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psMolecularDipole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psMolecularDipole
,
log
);
(
void
)
fprintf
(
log
,
" pMolecularDipole %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMolecularDipole
);
(
void
)
fprintf
(
log
,
" pMolecularDipole %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMolecularDipole
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psMolecularQuadrupole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psMolecularQuadrupole
,
log
);
(
void
)
fprintf
(
log
,
" pMolecularQuadrupole %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMolecularQuadrupole
);
(
void
)
fprintf
(
log
,
" pMolecularQuadrupole %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pMolecularQuadrupole
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameDipole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameDipole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameQuadrupole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psLabFrameQuadrupole
,
log
);
(
void
)
fprintf
(
log
,
" polarizationType %d
\n
"
,
amoebaGpu
->
amoebaSim
.
polarizationType
);
(
void
)
fprintf
(
log
,
" polarizationType %d
\n
"
,
amoebaGpu
->
amoebaSim
.
polarizationType
);
(
void
)
fprintf
(
log
,
" maxCovalentDegreeSz %d
\n
"
,
amoebaGpu
->
maxCovalentDegreeSz
);
(
void
)
fprintf
(
log
,
" maxCovalentDegreeSz %d
\n
"
,
amoebaGpu
->
maxCovalentDegreeSz
);
...
@@ -380,15 +395,11 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -380,15 +395,11 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" fd %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
fd
);
(
void
)
fprintf
(
log
,
" fd %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
fd
);
(
void
)
fprintf
(
log
,
" fq %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
fq
);
(
void
)
fprintf
(
log
,
" fq %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
fq
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psDampingFactorAndThole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psDampingFactorAndThole
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psCovalentDegree
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psPolarizationDegree
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psE_Field
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psE_FieldPolar
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psPolarizability
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psE_Field
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psE_FieldPolar
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psPolarizability
,
log
);
(
void
)
fprintf
(
log
,
" mutualInducedIterativeMethod %d
\n
"
,
amoebaGpu
->
mutualInducedIterativeMethod
);
(
void
)
fprintf
(
log
,
" mutualInducedIterativeMethod %d
\n
"
,
amoebaGpu
->
mutualInducedIterativeMethod
);
(
void
)
fprintf
(
log
,
" mutualInducedMaxIterations %d
\n
"
,
amoebaGpu
->
mutualInducedMaxIterations
);
(
void
)
fprintf
(
log
,
" mutualInducedMaxIterations %d
\n
"
,
amoebaGpu
->
mutualInducedMaxIterations
);
...
@@ -396,19 +407,22 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -396,19 +407,22 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" mutualInducedTargetEpsilon %10.3e
\n
"
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fprintf
(
log
,
" mutualInducedTargetEpsilon %10.3e
\n
"
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fprintf
(
log
,
" mutualInducedCurrentEpsilon %10.3e
\n
"
,
amoebaGpu
->
mutualInducedCurrentEpsilon
);
(
void
)
fprintf
(
log
,
" mutualInducedCurrentEpsilon %10.3e
\n
"
,
amoebaGpu
->
mutualInducedCurrentEpsilon
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipole
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipole
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolar
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolar
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psCurrentEpsilon
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psCurrentEpsilon
,
log
);
(
void
)
fprintf
(
log
,
" numberOfSorWorkVectors %u
\n
"
,
amoebaGpu
->
numberOfSorWorkVectors
);
(
void
)
fprintf
(
log
,
" numberOfSorWorkVectors %u
\n
"
,
amoebaGpu
->
numberOfSorWorkVectors
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
0
],
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
0
],
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psTorque
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
1
],
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
2
],
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psGk_Field
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psWorkVector
[
3
],
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipoleS
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psTorque
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolarS
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psBorn
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psGk_Field
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psBornPolar
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipoleS
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psInducedDipolePolarS
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psBorn
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psBornPolar
,
log
);
(
void
)
fprintf
(
log
,
" includeObcCavityTerm %d
\n
"
,
amoebaGpu
->
includeObcCavityTerm
);
(
void
)
fprintf
(
log
,
" includeObcCavityTerm %d
\n
"
,
amoebaGpu
->
includeObcCavityTerm
);
(
void
)
fprintf
(
log
,
" dielectricOffset %15.7e
\n
"
,
gpu
->
sim
.
dielectricOffset
);
(
void
)
fprintf
(
log
,
" dielectricOffset %15.7e
\n
"
,
gpu
->
sim
.
dielectricOffset
);
(
void
)
fprintf
(
log
,
" probeRadius %15.7e
\n
"
,
gpu
->
sim
.
probeRadius
);
(
void
)
fprintf
(
log
,
" probeRadius %15.7e
\n
"
,
gpu
->
sim
.
probeRadius
);
...
@@ -420,14 +434,14 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -420,14 +434,14 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" vdwEpsilonCombiningRule %d
\n
"
,
amoebaGpu
->
vdwEpsilonCombiningRule
);
(
void
)
fprintf
(
log
,
" vdwEpsilonCombiningRule %d
\n
"
,
amoebaGpu
->
vdwEpsilonCombiningRule
);
(
void
)
fprintf
(
log
,
" vdwUsePBC %d
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwUsePBC
);
(
void
)
fprintf
(
log
,
" vdwUsePBC %d
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwUsePBC
);
(
void
)
fprintf
(
log
,
" vdwCutoff2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwCutoff2
);
(
void
)
fprintf
(
log
,
" vdwCutoff2 %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
vdwCutoff2
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psVdwSigmaEpsilon
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psVdwSigmaEpsilon
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psAmoebaVdwNonReductionID
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt
(
amoebaGpu
->
psAmoebaVdwNonReductionID
,
log
);
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaVdwReductionID
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt4
(
amoebaGpu
->
psAmoebaVdwReductionID
,
log
);
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaVdwReduction
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat
(
amoebaGpu
->
psAmoebaVdwReduction
,
log
);
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaVdwCoordinates
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat4
(
amoebaGpu
->
psAmoebaVdwCoordinates
,
log
);
gpuPrintCudaStreamUnsignedInt
(
amoebaGpu
->
psVdwWorkUnit
,
log
);
totalMemory
+=
gpuPrintCudaStreamUnsignedInt
(
amoebaGpu
->
psVdwWorkUnit
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psVdwExclusionIndicesIndex
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt
(
amoebaGpu
->
psVdwExclusionIndicesIndex
,
log
);
gpuPrintCudaStreamInt
(
amoebaGpu
->
psVdwExclusionIndices
,
log
);
totalMemory
+=
gpuPrintCudaStreamInt
(
amoebaGpu
->
psVdwExclusionIndices
,
log
);
(
void
)
fprintf
(
log
,
" amoebaVdwNonReductions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaVdwNonReductions
);
(
void
)
fprintf
(
log
,
" amoebaVdwNonReductions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaVdwNonReductions
);
(
void
)
fprintf
(
log
,
" pAmoebaVdwNonReductionID %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaVdwNonReductionID
);
(
void
)
fprintf
(
log
,
" pAmoebaVdwNonReductionID %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pAmoebaVdwNonReductionID
);
(
void
)
fprintf
(
log
,
" amoebaVdwReductions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaVdwReductions
);
(
void
)
fprintf
(
log
,
" amoebaVdwReductions %u
\n
"
,
amoebaGpu
->
amoebaSim
.
amoebaVdwReductions
);
...
@@ -436,7 +450,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -436,7 +450,7 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" pVdwExclusionIndicesIndex %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pVdwExclusionIndicesIndex
);
(
void
)
fprintf
(
log
,
" pVdwExclusionIndicesIndex %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pVdwExclusionIndicesIndex
);
(
void
)
fprintf
(
log
,
" pVdwExclusionIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pVdwExclusionIndices
);
(
void
)
fprintf
(
log
,
" pVdwExclusionIndices %p
\n
"
,
amoebaGpu
->
amoebaSim
.
pVdwExclusionIndices
);
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psWcaDispersionRadiusEpsilon
,
log
);
totalMemory
+=
gpuPrintCudaStreamFloat2
(
amoebaGpu
->
psWcaDispersionRadiusEpsilon
,
log
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
"
\n
"
);
(
void
)
fprintf
(
log
,
" epso %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
epso
);
(
void
)
fprintf
(
log
,
" epso %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
epso
);
(
void
)
fprintf
(
log
,
" epsh %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
epsh
);
(
void
)
fprintf
(
log
,
" epsh %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
epsh
);
...
@@ -447,6 +461,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
...
@@ -447,6 +461,8 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(
void
)
fprintf
(
log
,
" dispoff %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
dispoff
);
(
void
)
fprintf
(
log
,
" dispoff %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
dispoff
);
(
void
)
fprintf
(
log
,
" totalMaxWcaDispersionEnergy %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
totalMaxWcaDispersionEnergy
);
(
void
)
fprintf
(
log
,
" totalMaxWcaDispersionEnergy %15.7e
\n
"
,
amoebaGpu
->
amoebaSim
.
totalMaxWcaDispersionEnergy
);
(
void
)
fprintf
(
log
,
" total array memory %d
\n
"
,
totalMemory
);
(
void
)
fflush
(
log
);
(
void
)
fflush
(
log
);
}
}
...
@@ -1349,20 +1365,14 @@ static void gpuFixedEFieldAllocate( amoebaGpuContext amoebaGpu )
...
@@ -1349,20 +1365,14 @@ static void gpuFixedEFieldAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psDampingFactorAndThole
=
new
CUDAStream
<
float2
>
(
paddedNumberOfAtoms
,
1
,
"DampingFactorAndThole"
);
amoebaGpu
->
psDampingFactorAndThole
=
new
CUDAStream
<
float2
>
(
paddedNumberOfAtoms
,
1
,
"DampingFactorAndThole"
);
amoebaGpu
->
amoebaSim
.
pDampingFactorAndThole
=
amoebaGpu
->
psDampingFactorAndThole
->
_pDevData
;
amoebaGpu
->
amoebaSim
.
pDampingFactorAndThole
=
amoebaGpu
->
psDampingFactorAndThole
->
_pDevData
;
amoebaGpu
->
psC
ovalentDegree
=
new
CUDAStream
<
int
>
(
amoebaGpu
->
maxCovalentDegreeSz
*
paddedNumberOfAtoms
,
1
,
"CovalentDegree"
);
amoebaGpu
->
c
ovalentDegree
.
resize
(
amoebaGpu
->
maxCovalentDegreeSz
*
paddedNumberOfAtoms
,
0
);
amoebaGpu
->
p
sP
olarizationDegree
=
new
CUDAStream
<
int
>
(
amoebaGpu
->
maxCovalentDegreeSz
*
paddedNumberOfAtoms
,
1
,
"PolarizationDegree"
);
amoebaGpu
->
polarizationDegree
.
resize
(
amoebaGpu
->
maxCovalentDegreeSz
*
paddedNumberOfAtoms
,
0
);
unsigned
int
offset
=
paddedNumberOfAtoms
*
sizeof
(
float
);
unsigned
int
offset
=
paddedNumberOfAtoms
*
sizeof
(
float
);
memset
(
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
,
0
,
2
*
offset
);
memset
(
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
,
0
,
2
*
offset
);
//memset( amoebaGpu->psE_Field->_pSysData, 0, offset*3 );
//memset( amoebaGpu->psE_Field->_pSysData, 0, offset*3 );
//memset( amoebaGpu->psE_FieldPolar->_pSysData, 0, offset*3 );
//memset( amoebaGpu->psE_FieldPolar->_pSysData, 0, offset*3 );
// should be removed XXXXX
offset
=
amoebaGpu
->
maxCovalentDegreeSz
*
paddedNumberOfAtoms
*
sizeof
(
int
);
memset
(
amoebaGpu
->
psCovalentDegree
->
_pSysData
,
0
,
offset
);
memset
(
amoebaGpu
->
psPolarizationDegree
->
_pSysData
,
0
,
offset
);
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -1664,9 +1674,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1664,9 +1674,9 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// psCovalentDegree & psPolarizationDegree are arrays of size maxCovalentDegreeSz*paddedNumberOfAtoms
// psCovalentDegree & psPolarizationDegree are arrays of size maxCovalentDegreeSz*paddedNumberOfAtoms
const
int
particlesOffset
=
ii
*
amoebaGpu
->
maxCovalentDegreeSz
;
const
int
particlesOffset
=
ii
*
amoebaGpu
->
maxCovalentDegreeSz
;
const
int
minCovalentIndex
=
minCovalentIndices
[
ii
];
const
int
minCovalentIndex
=
minCovalentIndices
[
ii
];
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
]
=
minCovalentIndex
;
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
]
=
minCovalentIndex
;
// covalent info
// covalent info
...
@@ -1680,7 +1690,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1680,7 +1690,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
") is out of range -- maxCovalentDegreeSz needs to be increased."
<<
std
::
endl
;
") is out of range -- maxCovalentDegreeSz needs to be increased."
<<
std
::
endl
;
errorCount
++
;
errorCount
++
;
}
else
{
}
else
{
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
covalentIndex
]
=
covalentDegree
[
jj
]
+
1
;
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
covalentIndex
]
=
covalentDegree
[
jj
]
+
1
;
}
}
}
}
}
}
...
@@ -1688,7 +1698,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1688,7 +1698,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// polarization covalent info
// polarization covalent info
const
int
minCovalentPolarizationIndex
=
minCovalentPolarizationIndices
[
ii
];
const
int
minCovalentPolarizationIndex
=
minCovalentPolarizationIndices
[
ii
];
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
=
minCovalentPolarizationIndex
;
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
=
minCovalentPolarizationIndex
;
for
(
unsigned
int
jj
=
4
;
jj
<
covalentInfo
.
size
();
jj
++
){
for
(
unsigned
int
jj
=
4
;
jj
<
covalentInfo
.
size
();
jj
++
){
const
std
::
vector
<
int
>
covalentList
=
covalentInfo
[
jj
];
const
std
::
vector
<
int
>
covalentList
=
covalentInfo
[
jj
];
...
@@ -1699,7 +1709,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1699,7 +1709,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
") is out of range -- maxCovalentDegreeSz needs to be increased."
<<
std
::
endl
;
") is out of range -- maxCovalentDegreeSz needs to be increased."
<<
std
::
endl
;
errorCount
++
;
errorCount
++
;
}
else
{
}
else
{
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
covalentIndex
]
=
covalentDegree
[
jj
]
+
1
;
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
covalentIndex
]
=
covalentDegree
[
jj
]
+
1
;
}
}
}
}
}
}
...
@@ -1739,7 +1749,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1739,7 +1749,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// covalent/polarization degree
// covalent/polarization degree
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%3d covalent/polarization degree: minIdx[%6d %6d] Thole=%12.5f dampingFactor=%12.5f
\n
"
,
ii
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%3d covalent/polarization degree: minIdx[%6d %6d] Thole=%12.5f dampingFactor=%12.5f
\n
"
,
ii
,
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
],
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
],
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
],
amoebaGpu
->
polarizationDegree
[
particlesOffset
],
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
[
ii
].
x
);
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psDampingFactorAndThole
->
_pSysData
[
ii
].
x
);
// covalent
// covalent
...
@@ -1752,20 +1762,20 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1752,20 +1762,20 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
int
count
=
0
;
int
count
=
0
;
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
if
(
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
jj
]
==
kk
){
if
(
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
jj
]
==
kk
){
if
(
count
==
0
){
if
(
count
==
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%d ["
,
kk
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%d ["
,
kk
);
}
}
float
pScale
=
polarScale
[
kk
-
1
];
float
pScale
=
polarScale
[
kk
-
1
];
int
particle2Index
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
]
+
jj
-
1
;
int
particle2Index
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
]
+
jj
-
1
;
if
(
kk
==
4
&&
particle2Index
>=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
){
if
(
kk
==
4
&&
particle2Index
>=
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
){
int
particle2Offset
=
particle2Index
-
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
+
1
;
int
particle2Offset
=
particle2Index
-
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
+
1
;
if
(
particle2Offset
<
amoebaGpu
->
maxCovalentDegreeSz
&&
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
particle2Offset
]
==
1
){
if
(
particle2Offset
<
amoebaGpu
->
maxCovalentDegreeSz
&&
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
particle2Offset
]
==
1
){
pScale
*=
0.5
;
pScale
*=
0.5
;
}
}
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5.1f "
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d %5.1f "
,
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
]
+
jj
-
1
,
pScale
);
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
]
+
jj
-
1
,
pScale
);
count
++
;
count
++
;
}
}
}
}
...
@@ -1782,11 +1792,11 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1782,11 +1792,11 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
int
count
=
0
;
int
count
=
0
;
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
if
(
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
jj
]
==
kk
){
if
(
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
jj
]
==
kk
){
if
(
count
==
0
){
if
(
count
==
0
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%d ["
,
kk
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%d ["
,
kk
);
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
+
jj
-
1
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
+
jj
-
1
);
count
++
;
count
++
;
}
}
}
}
...
@@ -1818,19 +1828,19 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1818,19 +1828,19 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// print entries w/ degree=kk
// print entries w/ degree=kk
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
if
(
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
jj
]
){
if
(
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
jj
]
){
int
index
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
jj
];
int
index
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
jj
];
float
pScale
=
polarScale
[
index
-
1
];
float
pScale
=
polarScale
[
index
-
1
];
float
mScale
=
mpoleScale
[
index
-
1
];
float
mScale
=
mpoleScale
[
index
-
1
];
int
particle2Index
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
]
+
jj
-
1
;
int
particle2Index
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
]
+
jj
-
1
;
if
(
index
==
4
&&
particle2Index
>=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
){
if
(
index
==
4
&&
particle2Index
>=
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
){
int
particle2Offset
=
particle2Index
-
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]
+
1
;
int
particle2Offset
=
particle2Index
-
amoebaGpu
->
polarizationDegree
[
particlesOffset
]
+
1
;
if
(
particle2Offset
<
amoebaGpu
->
maxCovalentDegreeSz
&&
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
particle2Offset
]
==
1
){
if
(
particle2Offset
<
amoebaGpu
->
maxCovalentDegreeSz
&&
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
particle2Offset
]
==
1
){
pScale
*=
0.5
;
pScale
*=
0.5
;
}
}
}
}
pScaleCheckSum
[
ii
]
+=
(
pScale
-
1.0
f
);
pScaleCheckSum
[
ii
]
+=
(
pScale
-
1.0
f
);
int
covIndex
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
];
int
covIndex
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
];
if
(
pScale
!=
1.0
f
){
if
(
pScale
!=
1.0
f
){
MapIntFloat
*
pMap
=
amoebaGpu
->
pMapArray
[
ii
];
MapIntFloat
*
pMap
=
amoebaGpu
->
pMapArray
[
ii
];
(
*
pMap
)[
covIndex
+
jj
-
1
]
=
pScale
;
(
*
pMap
)[
covIndex
+
jj
-
1
]
=
pScale
;
...
@@ -1841,10 +1851,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1841,10 +1851,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// polarization
// polarization
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
for
(
int
jj
=
1
;
jj
<
amoebaGpu
->
maxCovalentDegreeSz
;
jj
++
){
if
(
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
jj
]
){
if
(
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
jj
]
){
int
index
=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
jj
];
int
index
=
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
jj
];
dScaleCheckSum
[
ii
]
+=
(
directScale
[
index
-
1
]
-
1.0
f
);
dScaleCheckSum
[
ii
]
+=
(
directScale
[
index
-
1
]
-
1.0
f
);
int
covIndex
=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
];
int
covIndex
=
amoebaGpu
->
polarizationDegree
[
particlesOffset
];
if
(
directScale
[
index
-
1
]
!=
1.0
f
){
if
(
directScale
[
index
-
1
]
!=
1.0
f
){
MapIntFloat
*
dMap
=
amoebaGpu
->
dMapArray
[
ii
];
MapIntFloat
*
dMap
=
amoebaGpu
->
dMapArray
[
ii
];
(
*
dMap
)[
covIndex
+
jj
-
1
]
=
directScale
[
index
-
1
];
(
*
dMap
)[
covIndex
+
jj
-
1
]
=
directScale
[
index
-
1
];
...
@@ -1872,8 +1882,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
...
@@ -1872,8 +1882,6 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Upload
();
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Upload
();
amoebaGpu
->
psMolecularDipole
->
Upload
();
amoebaGpu
->
psMolecularDipole
->
Upload
();
amoebaGpu
->
psMolecularQuadrupole
->
Upload
();
amoebaGpu
->
psMolecularQuadrupole
->
Upload
();
amoebaGpu
->
psCovalentDegree
->
Upload
();
amoebaGpu
->
psPolarizationDegree
->
Upload
();
amoebaGpu
->
psDampingFactorAndThole
->
Upload
();
amoebaGpu
->
psDampingFactorAndThole
->
Upload
();
amoebaGpu
->
psPolarizability
->
Upload
();
amoebaGpu
->
psPolarizability
->
Upload
();
amoebaGpu
->
gpuContext
->
psPosq4
->
Upload
();
amoebaGpu
->
gpuContext
->
psPosq4
->
Upload
();
...
@@ -2652,21 +2660,27 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
...
@@ -2652,21 +2660,27 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
delete
gpu
->
psMolecularDipole
;
delete
gpu
->
psMolecularDipole
;
delete
gpu
->
psMolecularQuadrupole
;
delete
gpu
->
psMolecularQuadrupole
;
delete
gpu
->
psLabFrameDipole
;
delete
gpu
->
psLabFrameDipole
;
delete
gpu
->
psLabFrameQuadrupole
;
delete
gpu
->
psLabFrameQuadrupole
;
delete
gpu
->
psDampingFactorAndThole
;
delete
gpu
->
psDampingFactorAndThole
;
delete
gpu
->
psCovalentDegree
;
delete
gpu
->
psPolarizationDegree
;
delete
gpu
->
psE_Field
;
delete
gpu
->
psE_Field
;
delete
gpu
->
psE_FieldPolar
;
delete
gpu
->
psE_FieldPolar
;
delete
gpu
->
psInducedDipole
;
delete
gpu
->
psInducedDipole
;
delete
gpu
->
psInducedDipolePolar
;
delete
gpu
->
psInducedDipolePolar
;
delete
gpu
->
psPolarizability
;
delete
gpu
->
psPolarizability
;
delete
gpu
->
psCurrentEpsilon
;
delete
gpu
->
psCurrentEpsilon
;
delete
gpu
->
psWorkVector
[
0
];
delete
gpu
->
psWorkVector
[
0
];
delete
gpu
->
psWorkVector
[
1
];
delete
gpu
->
psWorkVector
[
1
];
delete
gpu
->
psWorkVector
[
2
];
delete
gpu
->
psWorkVector
[
2
];
delete
gpu
->
psWorkVector
[
3
];
delete
gpu
->
psWorkVector
[
3
];
delete
gpu
->
psTorque
;
delete
gpu
->
psTorque
;
delete
gpu
->
psGk_Field
;
delete
gpu
->
psGk_Field
;
...
@@ -2773,6 +2787,7 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu, int hasAmoebaGener
...
@@ -2773,6 +2787,7 @@ void amoebaGpuBuildOutputBuffers( amoebaGpuContext amoebaGpu, int hasAmoebaGener
amoebaGpu
->
amoebaSim
.
pWorkArray_3_2
=
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
;
amoebaGpu
->
amoebaSim
.
pWorkArray_3_2
=
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
;
// used in GK calculations
// used in GK calculations
if
(
hasAmoebaGeneralizedKirkwood
)
if
(
hasAmoebaGeneralizedKirkwood
)
{
{
if
(
amoebaGpu
->
psWorkArray_3_3
)
if
(
amoebaGpu
->
psWorkArray_3_3
)
...
@@ -2810,19 +2825,19 @@ static void getScalingDegrees( amoebaGpuContext amoebaGpu, unsigned int particle
...
@@ -2810,19 +2825,19 @@ static void getScalingDegrees( amoebaGpuContext amoebaGpu, unsigned int particle
{
{
int
particlesOffset
=
particleI
*
amoebaGpu
->
maxCovalentDegreeSz
;
int
particlesOffset
=
particleI
*
amoebaGpu
->
maxCovalentDegreeSz
;
unsigned
int
minCovalentIndex
=
static_cast
<
unsigned
int
>
(
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
]);
unsigned
int
minCovalentIndex
=
static_cast
<
unsigned
int
>
(
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
]);
unsigned
int
minCovalentPolarizationIndex
=
static_cast
<
unsigned
int
>
(
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
]);
unsigned
int
minCovalentPolarizationIndex
=
static_cast
<
unsigned
int
>
(
amoebaGpu
->
polarizationDegree
[
particlesOffset
]);
if
(
particleJ
<
minCovalentIndex
||
particleJ
>=
(
minCovalentIndex
+
amoebaGpu
->
maxCovalentDegreeSz
-
1
)
){
if
(
particleJ
<
minCovalentIndex
||
particleJ
>=
(
minCovalentIndex
+
amoebaGpu
->
maxCovalentDegreeSz
-
1
)
){
*
covalentDegree
=
0
;
*
covalentDegree
=
0
;
}
else
{
}
else
{
*
covalentDegree
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
(
particleJ
-
minCovalentIndex
)
+
1
];
*
covalentDegree
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
(
particleJ
-
minCovalentIndex
)
+
1
];
}
}
if
(
particleJ
<
minCovalentPolarizationIndex
||
particleJ
>=
(
minCovalentPolarizationIndex
+
amoebaGpu
->
maxCovalentDegreeSz
-
1
)
){
if
(
particleJ
<
minCovalentPolarizationIndex
||
particleJ
>=
(
minCovalentPolarizationIndex
+
amoebaGpu
->
maxCovalentDegreeSz
-
1
)
){
*
polarizationDegree
=
0
;
*
polarizationDegree
=
0
;
}
else
{
}
else
{
*
polarizationDegree
=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
(
particleJ
-
minCovalentPolarizationIndex
)
+
1
];
*
polarizationDegree
=
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
(
particleJ
-
minCovalentPolarizationIndex
)
+
1
];
}
}
/* if( *covalentDegree > 5 || *polarizationDegree > 5 ){
/* if( *covalentDegree > 5 || *polarizationDegree > 5 ){
...
@@ -2886,9 +2901,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
...
@@ -2886,9 +2901,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
if
(
amoebaGpu
->
psCovalentDegree
==
NULL
){
if
(
amoebaGpu
->
covalentDegree
.
size
()
<
1
)
return
;
return
;
}
const
unsigned
int
paddedAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
const
unsigned
int
paddedAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
const
unsigned
int
actualAtoms
=
amoebaGpu
->
gpuContext
->
natoms
;
const
unsigned
int
actualAtoms
=
amoebaGpu
->
gpuContext
->
natoms
;
...
@@ -2914,18 +2927,18 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
...
@@ -2914,18 +2927,18 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
{
{
int
x
=
atom1
/
grid
;
int
x
=
atom1
/
grid
;
int
particlesOffset
=
atom1
*
amoebaGpu
->
maxCovalentDegreeSz
;
int
particlesOffset
=
atom1
*
amoebaGpu
->
maxCovalentDegreeSz
;
int
minCovalentIndex
=
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
];
int
minCovalentIndex
=
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
];
int
minPolarCovIndex
=
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
];
int
minPolarCovIndex
=
amoebaGpu
->
polarizationDegree
[
particlesOffset
];
int
maxCIndex
=
0
;
int
maxCIndex
=
0
;
int
maxPIndex
=
0
;
int
maxPIndex
=
0
;
for
(
int
jj
=
amoebaGpu
->
maxCovalentDegreeSz
-
1
;
jj
>=
1
&&
(
maxPIndex
==
0
||
maxCIndex
==
0
);
jj
--
)
for
(
int
jj
=
amoebaGpu
->
maxCovalentDegreeSz
-
1
;
jj
>=
1
&&
(
maxPIndex
==
0
||
maxCIndex
==
0
);
jj
--
)
{
{
if
(
amoebaGpu
->
psC
ovalentDegree
->
_pSysData
[
particlesOffset
+
jj
]
&&
maxCellIndex
[
x
]
<
(
minCovalentIndex
+
jj
)
)
if
(
amoebaGpu
->
c
ovalentDegree
[
particlesOffset
+
jj
]
&&
maxCellIndex
[
x
]
<
(
minCovalentIndex
+
jj
)
)
{
{
maxCellIndex
[
x
]
=
minCovalentIndex
+
jj
;
maxCellIndex
[
x
]
=
minCovalentIndex
+
jj
;
maxCIndex
++
;
maxCIndex
++
;
}
}
if
(
amoebaGpu
->
p
sP
olarizationDegree
->
_pSysData
[
particlesOffset
+
jj
]
&&
maxCellIndex
[
x
]
<
(
minPolarCovIndex
+
jj
)
)
if
(
amoebaGpu
->
polarizationDegree
[
particlesOffset
+
jj
]
&&
maxCellIndex
[
x
]
<
(
minPolarCovIndex
+
jj
)
)
{
{
maxCellIndex
[
x
]
=
minPolarCovIndex
+
jj
;
maxCellIndex
[
x
]
=
minPolarCovIndex
+
jj
;
maxPIndex
++
;
maxPIndex
++
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaGpuTypes.h
View file @
6bad9d44
...
@@ -30,20 +30,10 @@
...
@@ -30,20 +30,10 @@
#include "kernels/gputypes.h"
#include "kernels/gputypes.h"
#include "amoebaCudaTypes.h"
#include "amoebaCudaTypes.h"
#define THREADS_PER_BLOCK 256
#include <map>
#include <map>
typedef
std
::
map
<
int
,
float
>
MapIntFloat
;
typedef
std
::
map
<
int
,
float
>
MapIntFloat
;
typedef
MapIntFloat
::
const_iterator
MapIntFloatCI
;
typedef
MapIntFloat
::
const_iterator
MapIntFloatCI
;
/*
* Remove
* pMapArray, dMapArray, paddedNumberOfAtoms, nonbondBlocks, nonbondThreadsPerBlock, nonbondOutputBuffers
* allocation of torqueMapForce psCovalentDegree psPolarizationDegree
*
THREADS_PER_BLOCK
*/
struct
_amoebaGpuContext
{
struct
_amoebaGpuContext
{
_gpuContext
*
gpuContext
;
_gpuContext
*
gpuContext
;
...
@@ -112,7 +102,6 @@ struct _amoebaGpuContext {
...
@@ -112,7 +102,6 @@ struct _amoebaGpuContext {
// multipole parameters
// multipole parameters
CUDAStream
<
int4
>*
psMultipoleParticlesIdsAndAxisType
;
CUDAStream
<
int4
>*
psMultipoleParticlesIdsAndAxisType
;
CUDAStream
<
int
>*
psMultipoleAxisOffset
;
// buffer indices used for mapping torques onto forces
// buffer indices used for mapping torques onto forces
...
@@ -133,10 +122,10 @@ struct _amoebaGpuContext {
...
@@ -133,10 +122,10 @@ struct _amoebaGpuContext {
CUDAStream
<
float2
>*
psDampingFactorAndThole
;
CUDAStream
<
float2
>*
psDampingFactorAndThole
;
//
slated for removal -- no longer used
//
used to setup scaling constants
CUDAStream
<
int
>
*
psC
ovalentDegree
;
std
::
vector
<
int
>
c
ovalentDegree
;
CUDAStream
<
int
>
*
p
sP
olarizationDegree
;
std
::
vector
<
int
>
polarizationDegree
;
// fixed-E field
// fixed-E field
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
6bad9d44
...
@@ -255,18 +255,22 @@ void kInitializeMutualInducedAndGkField_kernel(
...
@@ -255,18 +255,22 @@ void kInitializeMutualInducedAndGkField_kernel(
float
*
inducedDipolePolarS
)
float
*
inducedDipolePolarS
)
{
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
cSim
.
atoms
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
fixedEField
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
];
fixedEFieldPolar
[
threadId
]
*=
polarizability
[
threadId
];
inducedDipolePolar
[
threadId
]
=
fixedEFieldPolar
[
threadId
];
fixedGkField
[
threadId
]
*=
polarizability
[
threadId
];
fixedEField
[
pos
]
*=
polarizability
[
pos
];
inducedDipoleS
[
threadId
]
=
fixedEField
[
threadId
]
+
fixedGkField
[
threadId
];
inducedDipole
[
pos
]
=
fixedEField
[
pos
];
inducedDipolePolarS
[
threadId
]
=
fixedEFieldPolar
[
threadId
]
+
fixedGkField
[
threadId
];
fixedEFieldPolar
[
pos
]
*=
polarizability
[
pos
];
inducedDipolePolar
[
pos
]
=
fixedEFieldPolar
[
pos
];
fixedGkField
[
pos
]
*=
polarizability
[
pos
];
inducedDipoleS
[
pos
]
=
fixedEField
[
pos
]
+
fixedGkField
[
pos
];
inducedDipolePolarS
[
pos
]
=
fixedEFieldPolar
[
pos
]
+
fixedGkField
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
...
@@ -355,21 +359,24 @@ void kSorUpdateMutualInducedAndGkField_kernel(
...
@@ -355,21 +359,24 @@ void kSorUpdateMutualInducedAndGkField_kernel(
{
{
float
polarSOR
=
0.70
f
;
float
polarSOR
=
0.70
f
;
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
cSim
.
atoms
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipoleP
[
threadId
]
=
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
float
previousDipole
=
inducedDipole
[
pos
];
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
float
previousDipoleP
=
inducedDipoleP
[
pos
];
inducedDipole
[
pos
]
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
inducedDipoleP
[
pos
]
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
matrixProduct
[
pos
]
=
(
inducedDipole
[
pos
]
-
previousDipole
)
*
(
inducedDipole
[
pos
]
-
previousDipole
);
matrixProductP
[
pos
]
=
(
inducedDipoleP
[
pos
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
__global__
__global__
...
@@ -389,21 +396,23 @@ void kSorUpdateMutualInducedAndGkFieldS_kernel(
...
@@ -389,21 +396,23 @@ void kSorUpdateMutualInducedAndGkFieldS_kernel(
{
{
float
polarSOR
=
0.70
f
;
float
polarSOR
=
0.70
f
;
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
cSim
.
atoms
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipole
=
inducedDipole
[
pos
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
pos
];
inducedDipole
[
threadId
]
=
fixedGkField
[
threadId
]
+
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipole
[
pos
]
=
fixedGkField
[
pos
]
+
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
inducedDipoleP
[
threadId
]
=
fixedGkField
[
threadId
]
+
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
inducedDipoleP
[
pos
]
=
fixedGkField
[
pos
]
+
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
matrixProduct
[
pos
]
=
(
inducedDipole
[
pos
]
-
previousDipole
)
*
(
inducedDipole
[
pos
]
-
previousDipole
);
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
matrixProductP
[
pos
]
=
(
inducedDipoleP
[
pos
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
// reduce psWorkArray_3_1 -> outputArray
// reduce psWorkArray_3_1 -> outputArray
...
@@ -437,46 +446,6 @@ static void kReduceMutualInducedAndGkFields(amoebaGpuContext amoebaGpu,
...
@@ -437,46 +446,6 @@ static void kReduceMutualInducedAndGkFields(amoebaGpuContext amoebaGpu,
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields4"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFields4"
);
}
}
#ifdef AMOEBA_DEBUG
#if 0
static void printMiFieldBuffer( amoebaGpuContext amoebaGpu, unsigned int bufferIndex )
{
(void) fprintf( amoebaGpu->log, "MI Field Buffer %u\n", bufferIndex );
unsigned int start = bufferIndex*3*gpu->sim.paddedNumberOfAtoms;
unsigned int stop = (bufferIndex+1)*3*gpu->sim.paddedNumberOfAtoms;
for( unsigned int ii = start; ii < stop; ii += 3 ){
unsigned int ii3Index = ii/3;
unsigned int bufferIndex = ii3Index/(gpu->sim.paddedNumberOfAtoms);
unsigned int particleIndex = ii3Index - bufferIndex*(gpu->sim.paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %6u %3u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii/3, bufferIndex, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysData[ii],
amoebaGpu->psWorkArray_3_1->_pSysData[ii+1],
amoebaGpu->psWorkArray_3_1->_pSysData[ii+2],
amoebaGpu->psWorkArray_3_2->_pSysData[ii],
amoebaGpu->psWorkArray_3_2->_pSysData[ii+1],
amoebaGpu->psWorkArray_3_2->_pSysData[ii+2] );
}
}
static void printMiFieldAtomBuffers( amoebaGpuContext amoebaGpu, unsigned int targetAtom )
{
(void) fprintf( amoebaGpu->log, "MI Field atom %u\n", targetAtom );
for( unsigned int ii = 0; ii < gpu->sim.outputBuffers; ii++ ){
unsigned int particleIndex = 3*(targetAtom + ii*gpu->sim.paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %2u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysData[particleIndex],
amoebaGpu->psWorkArray_3_1->_pSysData[particleIndex+1],
amoebaGpu->psWorkArray_3_1->_pSysData[particleIndex+2],
amoebaGpu->psWorkArray_3_2->_pSysData[particleIndex],
amoebaGpu->psWorkArray_3_2->_pSysData[particleIndex+1],
amoebaGpu->psWorkArray_3_2->_pSysData[particleIndex+2] );
}
}
#endif
#endif
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Compute mutual induce field
Compute mutual induce field
...
@@ -576,14 +545,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
...
@@ -576,14 +545,6 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
amoebaGpu
->
psWorkArray_3_3
->
Download
();
amoebaGpu
->
psWorkArray_3_3
->
Download
();
amoebaGpu
->
psWorkArray_3_4
->
Download
();
amoebaGpu
->
psWorkArray_3_4
->
Download
();
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 0) );
//printMiFieldAtomBuffers( amoebaGpu, (targetAtom + 1) );
//printMiFieldAtomBuffers( amoebaGpu, 100 );
//printMiFieldBuffer( amoebaGpu, 0 );
//printMiFieldBuffer( amoebaGpu, 1 );
//printMiFieldBuffer( amoebaGpu, 37 );
//printMiFieldBuffer( amoebaGpu, 38 );
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
if
(
amoebaGpu
->
log
&&
iteration
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished MI kernel execution %d
\n
"
,
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
@@ -711,28 +672,13 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -711,28 +672,13 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
int
iteration
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
int
numOfElems
=
gpu
->
natoms
*
3
;
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
numOfElems
);
int
numBlocks
=
numOfElems
/
numThreads
;
if
(
(
numOfElems
%
numThreads
)
!=
0
)
numBlocks
++
;
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
&&
timestep
==
1
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d numOfElems=%d numThreads=%d numBlocks=%d "
"maxIterations=%d targetEpsilon=%.3e
\n
"
,
methodName
,
gpu
->
natoms
,
numOfElems
,
numThreads
,
numBlocks
,
amoebaGpu
->
mutualInducedMaxIterations
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedAndGkField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kInitializeMutualInducedAndGkField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psGk_Field
->
_pDevData
,
amoebaGpu
->
psGk_Field
->
_pDevData
,
...
@@ -812,14 +758,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -812,14 +758,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
// post matrix multiply
// post matrix multiply
kSorUpdateMutualInducedAndGkField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedAndGkField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"cudaComputeAmoebaMutualInducedAndGkFieldSorUpdate1"
);
LAUNCHERROR
(
"cudaComputeAmoebaMutualInducedAndGkFieldSorUpdate1"
);
kSorUpdateMutualInducedAndGkFieldS_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedAndGkFieldS_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipoleS
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolarS
->
_pDevData
,
amoebaGpu
->
psInducedDipoleS
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolarS
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
6bad9d44
...
@@ -120,14 +120,18 @@ void kInitializeMutualInducedField_kernel(
...
@@ -120,14 +120,18 @@ void kInitializeMutualInducedField_kernel(
float
*
inducedDipolePolar
)
float
*
inducedDipolePolar
)
{
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfAtoms
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
fixedEField
[
threadId
]
*=
polarizability
[
threadId
];
fixedEField
[
pos
]
*=
polarizability
[
pos
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
];
inducedDipole
[
pos
]
=
fixedEField
[
pos
];
fixedEFieldPolar
[
threadId
]
*=
polarizability
[
threadId
];
fixedEFieldPolar
[
pos
]
*=
polarizability
[
pos
];
inducedDipolePolar
[
threadId
]
=
fixedEFieldPolar
[
threadId
];
inducedDipolePolar
[
pos
]
=
fixedEFieldPolar
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
...
@@ -195,20 +199,24 @@ void kSorUpdateMutualInducedField_kernel(
...
@@ -195,20 +199,24 @@ void kSorUpdateMutualInducedField_kernel(
{
{
float
polarSOR
=
0.70
f
;
float
polarSOR
=
0.70
f
;
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfEntries
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipoleP
[
threadId
]
=
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
float
previousDipole
=
inducedDipole
[
pos
];
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
float
previousDipoleP
=
inducedDipoleP
[
pos
];
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
inducedDipole
[
pos
]
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
inducedDipoleP
[
pos
]
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
matrixProduct
[
pos
]
=
(
inducedDipole
[
pos
]
-
previousDipole
)
*
(
inducedDipole
[
pos
]
-
previousDipole
);
matrixProductP
[
pos
]
=
(
inducedDipoleP
[
pos
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
...
@@ -469,29 +477,14 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -469,29 +477,14 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
int
done
;
int
done
;
int
iteration
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
int
numOfElems
=
gpu
->
natoms
*
3
;
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
numOfElems
);
int
numBlocks
=
numOfElems
/
numThreads
;
if
(
(
numOfElems
%
numThreads
)
!=
0
)
numBlocks
++
;
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d numOfElems=%d numThreads=%d numBlocks=%d "
"maxIterations=%d targetEpsilon=%.3e
\n
"
,
methodName
,
gpu
->
natoms
,
numOfElems
,
numThreads
,
numBlocks
,
amoebaGpu
->
mutualInducedMaxIterations
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kInitializeMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
natoms
,
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
@@ -555,7 +548,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -555,7 +548,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
// post matrix multiply
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
6bad9d44
...
@@ -242,14 +242,16 @@ static void kInitializeMutualInducedField_kernel(
...
@@ -242,14 +242,16 @@ static void kInitializeMutualInducedField_kernel(
float
*
inducedDipolePolar
)
float
*
inducedDipolePolar
)
{
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfAtoms
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
fixedEField
[
threadId
]
*=
polarizability
[
threadId
];
fixedEField
[
pos
]
*=
polarizability
[
pos
];
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
];
inducedDipole
[
pos
]
=
fixedEField
[
pos
];
fixedEFieldPolar
[
threadId
]
*=
polarizability
[
threadId
];
fixedEFieldPolar
[
pos
]
*=
polarizability
[
pos
];
inducedDipolePolar
[
threadId
]
=
fixedEFieldPolar
[
threadId
];
inducedDipolePolar
[
pos
]
=
fixedEFieldPolar
[
pos
];
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
...
@@ -325,27 +327,31 @@ static void kSorUpdateMutualInducedField_kernel(
...
@@ -325,27 +327,31 @@ static void kSorUpdateMutualInducedField_kernel(
float
*
matrixProduct
,
float
*
matrixProductP
)
float
*
matrixProduct
,
float
*
matrixProductP
)
{
{
int
threadId
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
int
pos
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
threadId
>=
3
*
numberOfEntries
)
return
;
while
(
pos
<
3
*
cSim
.
atoms
)
{
float
previousDipole
=
inducedDipole
[
threadId
];
float
previousDipoleP
=
inducedDipoleP
[
threadId
];
// add self terms to fields
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
matrixProduct
[
threadId
]
+=
term
*
previousDipole
;
matrixProductP
[
threadId
]
+=
term
*
previousDipoleP
;
inducedDipole
[
threadId
]
=
fixedEField
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProduct
[
threadId
];
inducedDipoleP
[
threadId
]
=
fixedEFieldP
[
threadId
]
+
polarizability
[
threadId
]
*
matrixProductP
[
threadId
];
const
float
polarSOR
=
0.70
f
;
float
previousDipole
=
inducedDipole
[
pos
];
inducedDipole
[
threadId
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
threadId
]
-
previousDipole
);
float
previousDipoleP
=
inducedDipoleP
[
pos
];
inducedDipoleP
[
threadId
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
// add self terms to fields
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
matrixProduct
[
pos
]
+=
term
*
previousDipole
;
matrixProductP
[
pos
]
+=
term
*
previousDipoleP
;
inducedDipole
[
pos
]
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
inducedDipoleP
[
pos
]
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
const
float
polarSOR
=
0.70
f
;
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
matrixProduct
[
pos
]
=
(
inducedDipole
[
pos
]
-
previousDipole
)
*
(
inducedDipole
[
pos
]
-
previousDipole
);
matrixProductP
[
pos
]
=
(
inducedDipoleP
[
pos
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
matrixProduct
[
threadId
]
=
(
inducedDipole
[
threadId
]
-
previousDipole
)
*
(
inducedDipole
[
threadId
]
-
previousDipole
)
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
matrixProductP
[
threadId
]
=
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
)
*
(
inducedDipoleP
[
threadId
]
-
previousDipoleP
);
}
}
}
...
@@ -539,28 +545,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -539,28 +545,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
int
iteration
;
int
iteration
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
int
numOfElems
=
gpu
->
natoms
*
3
;
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
numOfElems
);
int
numBlocks
=
numOfElems
/
numThreads
;
if
(
(
numOfElems
%
numThreads
)
!=
0
)
numBlocks
++
;
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d numOfElems=%d numThreads=%d numBlocks=%d "
"maxIterations=%d targetEpsilon=%.3e
\n
"
,
methodName
,
gpu
->
natoms
,
numOfElems
,
numThreads
,
numBlocks
,
amoebaGpu
->
mutualInducedMaxIterations
,
amoebaGpu
->
mutualInducedTargetEpsilon
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kInitializeMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
natoms
,
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
@@ -607,7 +598,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -607,7 +598,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// post matrix multiply
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
numBlocks
,
numThreads
>>>
(
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.cu
deleted
100644 → 0
View file @
1beac75d
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "amoebaGpuTypes.h"
#include "amoebaCudaKernels.h"
#include <stdio.h>
#undef AMOEBA_OFFSET_3
#undef AMOEBA_INCLUDE_DIAGONAL
#define METHOD_NAME(a, b) a##ExcludeDiagonalOffset1##b
#include "kCalculateAmoebaCudaReduce.h"
#undef METHOD_NAME
#define AMOEBA_OFFSET_3
#define METHOD_NAME(a, b) a##ExcludeDiagonalOffset3##b
#include "kCalculateAmoebaCudaReduce.h"
#undef METHOD_NAME
#undef AMOEBA_OFFSET_3
#define AMOEBA_INCLUDE_DIAGONAL
#define METHOD_NAME(a, b) a##IncludeDiagonalOffset1##b
#include "kCalculateAmoebaCudaReduce.h"
#undef METHOD_NAME
#define AMOEBA_OFFSET_3
#define METHOD_NAME(a, b) a##IncludeDiagonalOffset3##b
#include "kCalculateAmoebaCudaReduce.h"
#undef METHOD_NAME
#undef AMOEBA_OFFSET_3
#undef AMOEBA_INCLUDE_DIAGONAL
void
cudaReduceN2ToN
(
float
*
N2Array
,
int
Nsz
,
float
*
NArray
,
int
includeDiagonal
,
int
offset
)
{
int
numThreads
=
min
(
THREADS_PER_BLOCK
,
(
Nsz
));
int
numBlocksPerAtom
=
(
Nsz
/
numThreads
);
if
(
Nsz
%
numThreads
){
numBlocksPerAtom
++
;
}
int
numBlocks
=
numBlocksPerAtom
*
Nsz
;
float
*
partialSum1_d
;
// allocate GPU memory
cudaMalloc
(
(
void
**
)
&
partialSum1_d
,
numBlocks
*
offset
*
sizeof
(
float
)
);
if
(
includeDiagonal
){
if
(
offset
==
3
){
kCalculateAmoebaReduceIncludeDiagonalOffset3N2ToNBlockLevel
<<<
numBlocks
,
numThreads
>>>
(
N2Array
,
partialSum1_d
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNBlockLevel1"
);
}
else
if
(
offset
==
1
){
kCalculateAmoebaReduceIncludeDiagonalOffset1N2ToNBlockLevel
<<<
numBlocks
,
numThreads
>>>
(
N2Array
,
partialSum1_d
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNBlockLevel2"
);
}
}
else
{
if
(
offset
==
3
){
kCalculateAmoebaReduceExcludeDiagonalOffset3N2ToNBlockLevel
<<<
numBlocks
,
numThreads
>>>
(
N2Array
,
partialSum1_d
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNBlockLevel3"
);
}
else
if
(
offset
==
1
){
kCalculateAmoebaReduceExcludeDiagonalOffset1N2ToNBlockLevel
<<<
numBlocks
,
numThreads
>>>
(
N2Array
,
partialSum1_d
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNBlockLevel4"
);
}
}
int
numBlocks2
=
numBlocks
;
numBlocks
=
numBlocks2
*
Nsz
/
numThreads
;
if
(
(
numBlocks2
*
Nsz
)
%
numThreads
){
numBlocks
++
;
}
if
(
offset
==
3
){
kCalculateAmoebaReduceIncludeDiagonalOffset3N2ToNFinal
<<<
numBlocks
,
numThreads
>>>
(
partialSum1_d
,
NArray
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNFinal3"
);
}
else
if
(
offset
==
1
){
kCalculateAmoebaReduceIncludeDiagonalOffset1N2ToNFinal
<<<
numBlocks
,
numThreads
>>>
(
partialSum1_d
,
NArray
,
Nsz
,
numBlocksPerAtom
);
LAUNCHERROR
(
"kCalculateAmoebaReduceN2ToNFinal1"
);
}
//Free memory
cudaFree
(
partialSum1_d
);
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaReduce.h
deleted
100644 → 0
View file @
1beac75d
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
typedef
unsigned
int
uint
;
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaReduce
,
N2ToNBlockLevel
)(
float
*
N2Array
,
float
*
partialSum
,
int
num
,
int
numberOfBlocksPerAtom
)
{
uint
tid
=
threadIdx
.
x
;
__shared__
float
asx
[
THREADS_PER_BLOCK
];
asx
[
tid
]
=
0
.
0
f
;
#ifdef AMOEBA_OFFSET_3
__shared__
float
asy
[
THREADS_PER_BLOCK
];
__shared__
float
asz
[
THREADS_PER_BLOCK
];
asx
[
tid
]
=
0
.
0
f
;
asy
[
tid
]
=
asz
[
tid
]
=
0
.
0
f
;
int
offset
=
3
;
#else
int
offset
=
1
;
#endif
int
atomI
=
blockIdx
.
x
/
numberOfBlocksPerAtom
;
int
atomJ
=
(
blockIdx
.
x
%
numberOfBlocksPerAtom
)
*
blockDim
.
x
+
tid
;
#ifdef AMOEBA_INCLUDE_DIAGONAL
if
(
atomJ
<
num
&&
atomI
<
num
){
#else
if
(
atomJ
<
num
&&
atomJ
!=
atomI
){
#endif
int
index
=
offset
*
(
atomI
*
num
+
atomJ
);
asx
[
tid
]
=
N2Array
[
index
];
#ifdef AMOEBA_OFFSET_3
asy
[
tid
]
=
N2Array
[
index
+
1
];
asz
[
tid
]
=
N2Array
[
index
+
2
];
#endif
}
__syncthreads
();
//to make sure all the elements are loaded
for
(
uint
s
=
(
blockDim
.
x
)
/
2
;
s
!=
0
;
s
>>=
1
){
if
(
tid
<
s
){
asx
[
tid
]
+=
asx
[
tid
+
s
];
#ifdef AMOEBA_OFFSET_3
asy
[
tid
]
+=
asy
[
tid
+
s
];
asz
[
tid
]
+=
asz
[
tid
+
s
];
#endif
}
__syncthreads
();
}
if
(
tid
==
0
){
partialSum
[
blockIdx
.
x
*
offset
]
=
asx
[
0
];
#ifdef AMOEBA_OFFSET_3
partialSum
[
blockIdx
.
x
*
3
+
1
]
=
asy
[
0
];
partialSum
[
blockIdx
.
x
*
3
+
2
]
=
asz
[
0
];
#endif
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaReduce
,
N2ToNFinal
)(
float
*
partialSum
,
float
*
final
,
int
num
,
int
numberOfBlocksPerAtom
)
{
uint
thread_id
=
__mul24
(
blockIdx
.
x
,
blockDim
.
x
)
+
threadIdx
.
x
;
if
(
thread_id
>
num
)
return
;
float3
sum
;
#ifdef AMOEBA_OFFSET_3
int
offset
=
3
;
sum
.
x
=
sum
.
y
=
sum
.
z
=
0
.
0
f
;
#else
int
offset
=
1
;
sum
.
x
=
0
.
0
f
;
#endif
int
index
=
thread_id
*
offset
*
numberOfBlocksPerAtom
;
for
(
int
i
=
0
;
i
<
numberOfBlocksPerAtom
;
i
++
){
sum
.
x
+=
partialSum
[
index
+
i
*
offset
];
#ifdef AMOEBA_OFFSET_3
sum
.
y
+=
partialSum
[
index
+
i
*
offset
+
1
];
sum
.
z
+=
partialSum
[
index
+
i
*
offset
+
2
];
#endif
}
final
[
thread_id
*
offset
]
=
sum
.
x
;
#ifdef AMOEBA_OFFSET_3
final
[
thread_id
*
3
+
1
]
=
sum
.
y
;
final
[
thread_id
*
3
+
2
]
=
sum
.
z
;
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment