Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f15f591b
Commit
f15f591b
authored
Feb 14, 2018
by
Peter Eastman
Browse files
Continuing to convert CudaArrays.
parent
b8c86406
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
430 additions
and
598 deletions
+430
-598
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+86
-98
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+5
-4
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+339
-496
No files found.
platforms/cuda/include/CudaKernels.h
View file @
f15f591b
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -265,9 +265,8 @@ private:
...
@@ -265,9 +265,8 @@ private:
class
CudaCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
class
CudaCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
public:
public:
CudaCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
CudaCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
}
~
CudaCalcHarmonicBondForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -298,7 +297,7 @@ private:
...
@@ -298,7 +297,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
};
/**
/**
...
@@ -307,7 +306,7 @@ private:
...
@@ -307,7 +306,7 @@ private:
class
CudaCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
class
CudaCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
public:
public:
CudaCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
CudaCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
}
~
CudaCalcCustomBondForceKernel
();
~
CudaCalcCustomBondForceKernel
();
/**
/**
...
@@ -341,7 +340,7 @@ private:
...
@@ -341,7 +340,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
};
};
...
@@ -352,9 +351,8 @@ private:
...
@@ -352,9 +351,8 @@ private:
class
CudaCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
class
CudaCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
public:
public:
CudaCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
CudaCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
}
~
CudaCalcHarmonicAngleForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -385,7 +383,7 @@ private:
...
@@ -385,7 +383,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
};
/**
/**
...
@@ -394,7 +392,7 @@ private:
...
@@ -394,7 +392,7 @@ private:
class
CudaCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
class
CudaCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
public:
public:
CudaCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
CudaCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
}
~
CudaCalcCustomAngleForceKernel
();
~
CudaCalcCustomAngleForceKernel
();
/**
/**
...
@@ -428,7 +426,7 @@ private:
...
@@ -428,7 +426,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
};
};
...
@@ -439,9 +437,8 @@ private:
...
@@ -439,9 +437,8 @@ private:
class
CudaCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
class
CudaCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
public:
public:
CudaCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
CudaCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
}
~
CudaCalcPeriodicTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -472,7 +469,7 @@ private:
...
@@ -472,7 +469,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params
;
CudaArray
params
;
};
};
/**
/**
...
@@ -481,9 +478,8 @@ private:
...
@@ -481,9 +478,8 @@ private:
class
CudaCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
class
CudaCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
public:
public:
CudaCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
CudaCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
params1
(
NULL
),
params2
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
}
~
CudaCalcRBTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -514,8 +510,8 @@ private:
...
@@ -514,8 +510,8 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaArray
*
params1
;
CudaArray
params1
;
CudaArray
*
params2
;
CudaArray
params2
;
};
};
/**
/**
...
@@ -524,9 +520,8 @@ private:
...
@@ -524,9 +520,8 @@ private:
class
CudaCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
class
CudaCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
public:
public:
CudaCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
CudaCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
,
coefficients
(
NULL
),
mapPositions
(
NULL
),
torsionMaps
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
)
{
}
}
~
CudaCalcCMAPTorsionForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -558,9 +553,9 @@ private:
...
@@ -558,9 +553,9 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
std
::
vector
<
int2
>
mapPositionsVec
;
std
::
vector
<
int2
>
mapPositionsVec
;
CudaArray
*
coefficients
;
CudaArray
coefficients
;
CudaArray
*
mapPositions
;
CudaArray
mapPositions
;
CudaArray
*
torsionMaps
;
CudaArray
torsionMaps
;
};
};
/**
/**
...
@@ -569,7 +564,7 @@ private:
...
@@ -569,7 +564,7 @@ private:
class
CudaCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
class
CudaCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
public:
public:
CudaCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
CudaCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
}
~
CudaCalcCustomTorsionForceKernel
();
~
CudaCalcCustomTorsionForceKernel
();
/**
/**
...
@@ -603,7 +598,7 @@ private:
...
@@ -603,7 +598,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
};
};
...
@@ -614,9 +609,7 @@ private:
...
@@ -614,9 +609,7 @@ private:
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
class
CudaCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeDispersionBsplineModuliX
(
NULL
),
pmeDispersionBsplineModuliY
(
NULL
),
pmeDispersionBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
dispersionFft
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -682,20 +675,20 @@ private:
...
@@ -682,20 +675,20 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
bool
hasInitializedFFT
;
bool
hasInitializedFFT
;
CudaArray
*
sigmaEpsilon
;
CudaArray
sigmaEpsilon
;
CudaArray
*
exceptionParams
;
CudaArray
exceptionParams
;
CudaArray
*
cosSinSums
;
CudaArray
cosSinSums
;
CudaArray
*
directPmeGrid
;
CudaArray
directPmeGrid
;
CudaArray
*
reciprocalPmeGrid
;
CudaArray
reciprocalPmeGrid
;
CudaArray
*
pmeBsplineModuliX
;
CudaArray
pmeBsplineModuliX
;
CudaArray
*
pmeBsplineModuliY
;
CudaArray
pmeBsplineModuliY
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
pmeBsplineModuliZ
;
CudaArray
*
pmeDispersionBsplineModuliX
;
CudaArray
pmeDispersionBsplineModuliX
;
CudaArray
*
pmeDispersionBsplineModuliY
;
CudaArray
pmeDispersionBsplineModuliY
;
CudaArray
*
pmeDispersionBsplineModuliZ
;
CudaArray
pmeDispersionBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
pmeAtomGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaArray
pmeEnergyBuffer
;
CudaSort
*
sort
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
...
@@ -737,7 +730,7 @@ private:
...
@@ -737,7 +730,7 @@ private:
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
cu
(
cu
),
params
(
NULL
),
forceCopy
(
NULL
),
system
(
system
),
hasInitializedKernel
(
false
)
{
}
}
~
CudaCalcCustomNonbondedForceKernel
();
~
CudaCalcCustomNonbondedForceKernel
();
/**
/**
...
@@ -769,13 +762,13 @@ private:
...
@@ -769,13 +762,13 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
CudaArray
*
interactionGroupData
;
CudaArray
interactionGroupData
;
CUfunction
interactionGroupKernel
;
CUfunction
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
...
@@ -790,9 +783,8 @@ private:
...
@@ -790,9 +783,8 @@ private:
class
CudaCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
class
CudaCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
public:
public:
CudaCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cu
(
cu
),
CudaCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cu
(
cu
),
hasCreatedKernels
(
false
)
,
params
(
NULL
),
bornSum
(
NULL
),
bornRadii
(
NULL
),
bornForce
(
NULL
),
obcChain
(
NULL
)
{
hasCreatedKernels
(
false
)
{
}
}
~
CudaCalcGBSAOBCForceKernel
();
/**
/**
* Initialize the kernel.
* Initialize the kernel.
*
*
...
@@ -823,11 +815,11 @@ private:
...
@@ -823,11 +815,11 @@ private:
int
maxTiles
;
int
maxTiles
;
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaArray
*
params
;
CudaArray
params
;
CudaArray
*
bornSum
;
CudaArray
bornSum
;
CudaArray
*
bornRadii
;
CudaArray
bornRadii
;
CudaArray
*
bornForce
;
CudaArray
bornForce
;
CudaArray
*
obcChain
;
CudaArray
obcChain
;
CUfunction
computeBornSumKernel
;
CUfunction
computeBornSumKernel
;
CUfunction
reduceBornSumKernel
;
CUfunction
reduceBornSumKernel
;
CUfunction
force1Kernel
;
CUfunction
force1Kernel
;
...
@@ -841,8 +833,7 @@ private:
...
@@ -841,8 +833,7 @@ private:
class
CudaCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
class
CudaCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
public:
CudaCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
CudaCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
system
(
system
)
{
valueBuffers
(
NULL
),
system
(
system
)
{
}
}
~
CudaCalcCustomGBForceKernel
();
~
CudaCalcCustomGBForceKernel
();
/**
/**
...
@@ -880,13 +871,13 @@ private:
...
@@ -880,13 +871,13 @@ private:
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
CudaParameterSet
*
energyDerivChain
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaParameterSet
*>
dValuedParam
;
std
::
vector
<
CudaArray
*
>
dValue0dParam
;
std
::
vector
<
CudaArray
>
dValue0dParam
;
CudaArray
*
longEnergyDerivs
;
CudaArray
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
globals
;
CudaArray
*
valueBuffers
;
CudaArray
valueBuffers
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
std
::
vector
<
bool
>
pairValueUsesParam
,
pairEnergyUsesParam
,
pairEnergyUsesValue
;
const
System
&
system
;
const
System
&
system
;
CUfunction
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
CUfunction
pairValueKernel
,
perParticleValueKernel
,
pairEnergyKernel
,
perParticleEnergyKernel
,
gradientChainRuleKernel
;
...
@@ -901,7 +892,7 @@ private:
...
@@ -901,7 +892,7 @@ private:
class
CudaCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
class
CudaCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
public:
public:
CudaCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
CudaCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
,
globals
(
NULL
)
{
hasInitializedKernel
(
false
),
cu
(
cu
),
system
(
system
),
params
(
NULL
)
{
}
}
~
CudaCalcCustomExternalForceKernel
();
~
CudaCalcCustomExternalForceKernel
();
/**
/**
...
@@ -935,7 +926,7 @@ private:
...
@@ -935,7 +926,7 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
const
System
&
system
;
const
System
&
system
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
};
};
...
@@ -946,8 +937,7 @@ private:
...
@@ -946,8 +937,7 @@ private:
class
CudaCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
class
CudaCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
public:
CudaCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
CudaCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
system
(
system
)
{
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
system
(
system
)
{
}
}
~
CudaCalcCustomHbondForceKernel
();
~
CudaCalcCustomHbondForceKernel
();
/**
/**
...
@@ -981,14 +971,14 @@ private:
...
@@ -981,14 +971,14 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
acceptorParams
;
CudaParameterSet
*
acceptorParams
;
CudaArray
*
globals
;
CudaArray
globals
;
CudaArray
*
donors
;
CudaArray
donors
;
CudaArray
*
acceptors
;
CudaArray
acceptors
;
CudaArray
*
donorExclusions
;
CudaArray
donorExclusions
;
CudaArray
*
acceptorExclusions
;
CudaArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
donorArgs
,
acceptorArgs
;
std
::
vector
<
void
*>
donorArgs
,
acceptorArgs
;
const
System
&
system
;
const
System
&
system
;
CUfunction
donorKernel
,
acceptorKernel
;
CUfunction
donorKernel
,
acceptorKernel
;
...
@@ -1000,7 +990,7 @@ private:
...
@@ -1000,7 +990,7 @@ private:
class
CudaCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
class
CudaCalcCustomCentroidBondForceKernel
:
public
CalcCustomCentroidBondForceKernel
{
public:
public:
CudaCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
CudaCalcCustomCentroidBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCentroidBondForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
groupParticles
(
NULL
),
groupWeights
(
NULL
),
groupOffsets
(
NULL
),
groupForces
(
NULL
),
bondGroups
(
NULL
),
centerPositions
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
}
}
~
CudaCalcCustomCentroidBondForceKernel
();
~
CudaCalcCustomCentroidBondForceKernel
();
/**
/**
...
@@ -1034,16 +1024,16 @@ private:
...
@@ -1034,16 +1024,16 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
CudaArray
*
groupParticles
;
CudaArray
groupParticles
;
CudaArray
*
groupWeights
;
CudaArray
groupWeights
;
CudaArray
*
groupOffsets
;
CudaArray
groupOffsets
;
CudaArray
*
groupForces
;
CudaArray
groupForces
;
CudaArray
*
bondGroups
;
CudaArray
bondGroups
;
CudaArray
*
centerPositions
;
CudaArray
centerPositions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
groupForcesArgs
;
std
::
vector
<
void
*>
groupForcesArgs
;
CUfunction
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
CUfunction
computeCentersKernel
,
groupForcesKernel
,
applyForcesKernel
;
const
System
&
system
;
const
System
&
system
;
...
@@ -1055,7 +1045,7 @@ private:
...
@@ -1055,7 +1045,7 @@ private:
class
CudaCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
class
CudaCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
public:
public:
CudaCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
CudaCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
}
}
~
CudaCalcCustomCompoundBondForceKernel
();
~
CudaCalcCustomCompoundBondForceKernel
();
/**
/**
...
@@ -1088,10 +1078,10 @@ private:
...
@@ -1088,10 +1078,10 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
globals
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
const
System
&
system
;
const
System
&
system
;
};
};
...
@@ -1101,9 +1091,7 @@ private:
...
@@ -1101,9 +1091,7 @@ private:
class
CudaCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
class
CudaCalcCustomManyParticleForceKernel
:
public
CalcCustomManyParticleForceKernel
{
public:
public:
CudaCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
CudaCalcCustomManyParticleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomManyParticleForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
params
(
NULL
),
particleTypes
(
NULL
),
orderIndex
(
NULL
),
particleOrder
(
NULL
),
exclusions
(
NULL
),
hasInitializedKernel
(
false
),
cu
(
cu
),
params
(
NULL
),
system
(
system
)
{
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighborPairs
(
NULL
),
numNeighborPairs
(
NULL
),
neighborStartIndex
(
NULL
),
numNeighborsForAtom
(
NULL
),
neighbors
(
NULL
),
system
(
system
)
{
}
}
~
CudaCalcCustomManyParticleForceKernel
();
~
CudaCalcCustomManyParticleForceKernel
();
/**
/**
...
@@ -1138,21 +1126,21 @@ private:
...
@@ -1138,21 +1126,21 @@ private:
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
int
maxNeighborPairs
,
forceWorkgroupSize
,
findNeighborsWorkgroupSize
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
particleTypes
;
CudaArray
particleTypes
;
CudaArray
*
orderIndex
;
CudaArray
orderIndex
;
CudaArray
*
particleOrder
;
CudaArray
particleOrder
;
CudaArray
*
exclusions
;
CudaArray
exclusions
;
CudaArray
*
exclusionStartIndex
;
CudaArray
exclusionStartIndex
;
CudaArray
*
blockCenter
;
CudaArray
blockCenter
;
CudaArray
*
blockBoundingBox
;
CudaArray
blockBoundingBox
;
CudaArray
*
neighborPairs
;
CudaArray
neighborPairs
;
CudaArray
*
numNeighborPairs
;
CudaArray
numNeighborPairs
;
CudaArray
*
neighborStartIndex
;
CudaArray
neighborStartIndex
;
CudaArray
*
numNeighborsForAtom
;
CudaArray
numNeighborsForAtom
;
CudaArray
*
neighbors
;
CudaArray
neighbors
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
void
*>
forceArgs
,
blockBoundsArgs
,
neighborsArgs
,
startIndicesArgs
,
copyPairsArgs
;
std
::
vector
<
void
*>
forceArgs
,
blockBoundsArgs
,
neighborsArgs
,
startIndicesArgs
,
copyPairsArgs
;
const
System
&
system
;
const
System
&
system
;
CUfunction
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
CUfunction
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
...
...
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
f15f591b
...
@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numAtoms
=
forceAtoms
[
i
][
0
].
size
();
int
numAtoms
=
forceAtoms
[
i
][
0
].
size
();
int
numArrays
=
(
numAtoms
+
3
)
/
4
;
int
startAtom
=
0
;
int
startAtom
=
0
;
while
(
startAtom
<
numAtoms
)
{
atomIndices
[
i
].
resize
(
numArrays
);
for
(
int
j
=
0
;
j
<
numArrays
;
j
++
)
{
int
width
=
min
(
numAtoms
-
startAtom
,
4
);
int
width
=
min
(
numAtoms
-
startAtom
,
4
);
int
paddedWidth
=
(
width
==
3
?
4
:
width
);
int
paddedWidth
=
(
width
==
3
?
4
:
width
);
vector
<
unsigned
int
>
indexVec
(
paddedWidth
*
numBonds
);
vector
<
unsigned
int
>
indexVec
(
paddedWidth
*
numBonds
);
...
@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
indexVec
[
bond
*
paddedWidth
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
}
atomIndices
[
i
].
push_back
(
CudaArray
());
atomIndices
[
i
][
j
].
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
].
back
().
initialize
(
context
,
numBonds
,
4
*
paddedWidth
,
"bondedIndices"
);
atomIndices
[
i
][
j
].
upload
(
&
indexVec
[
0
]);
atomIndices
[
i
].
back
().
upload
(
&
indexVec
[
0
]);
startAtom
+=
width
;
startAtom
+=
width
;
}
}
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
f15f591b
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -505,12 +505,6 @@ private:
...
@@ -505,12 +505,6 @@ private:
const HarmonicBondForce& force;
const HarmonicBondForce& force;
};
};
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int numContexts = cu.getPlatformData().contexts.size();
...
@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
...
@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
if (numBonds == 0)
if (numBonds == 0)
return;
return;
vector<vector<int> > atoms(numBonds, vector<int>(2));
vector<vector<int> > atoms(numBonds, vector<int>(2));
params
= CudaArray::creat
e<float2>(cu, numBonds, "bondParams");
params
.initializ
e<float2>(cu, numBonds, "bondParams");
vector<float2> paramVector(numBonds);
vector<float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
for (int i = 0; i < numBonds; i++) {
double length, k;
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
paramVector[i] = make_float2((float) length, (float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
...
@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
paramVector[i] = make_float2((float) length, (float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
...
@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
...
@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
...
@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
variables[name] = "bondParams"+params->getParameterSuffix(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
string value = argName+"["+cu.intToString(i)+"]";
...
@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
...
@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
}
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
...
@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -749,12 +741,6 @@ private:
...
@@ -749,12 +741,6 @@ private:
const HarmonicAngleForce& force;
const HarmonicAngleForce& force;
};
};
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int numContexts = cu.getPlatformData().contexts.size();
...
@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
...
@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
if (numAngles == 0)
if (numAngles == 0)
return;
return;
vector<vector<int> > atoms(numAngles, vector<int>(3));
vector<vector<int> > atoms(numAngles, vector<int>(3));
params
= CudaArray::creat
e<float2>(cu, numAngles, "angleParams");
params
.initializ
e<float2>(cu, numAngles, "angleParams");
vector<float2> paramVector(numAngles);
vector<float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
for (int i = 0; i < numAngles; i++) {
double angle, k;
double angle, k;
...
@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
...
@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
paramVector[i] = make_float2((float) angle, (float) k);
paramVector[i] = make_float2((float) angle, (float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float2");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
paramVector[i] = make_float2((float) angle, (float) k);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
...
@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
...
@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
...
@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
variables[name] = "angleParams"+params->getParameterSuffix(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
string value = argName+"["+cu.intToString(i)+"]";
...
@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
...
@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
}
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
...
@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -996,12 +980,6 @@ private:
...
@@ -996,12 +980,6 @@ private:
const PeriodicTorsionForce& force;
const PeriodicTorsionForce& force;
};
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int numContexts = cu.getPlatformData().contexts.size();
...
@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
...
@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
if (numTorsions == 0)
if (numTorsions == 0)
return;
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params
= CudaArray::creat
e<float4>(cu, numTorsions, "periodicTorsionParams");
params
.initializ
e<float4>(cu, numTorsions, "periodicTorsionParams");
vector<float4> paramVector(numTorsions);
vector<float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
for (int i = 0; i < numTorsions; i++) {
int periodicity;
int periodicity;
...
@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
...
@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
->
getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params
.
getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
...
@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
}
params
->
upload(paramVector);
params
.
upload(paramVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -1087,14 +1065,6 @@ private:
...
@@ -1087,14 +1065,6 @@ private:
const RBTorsionForce& force;
const RBTorsionForce& force;
};
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int numContexts = cu.getPlatformData().contexts.size();
...
@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
...
@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
if (numTorsions == 0)
if (numTorsions == 0)
return;
return;
vector<vector<int> > atoms(numTorsions, vector<int>(4));
vector<vector<int> > atoms(numTorsions, vector<int>(4));
params1
= CudaArray::creat
e<float4>(cu, numTorsions, "rbTorsionParams1");
params1
.initializ
e<float4>(cu, numTorsions, "rbTorsionParams1");
params2
= CudaArray::creat
e<float2>(cu, numTorsions, "rbTorsionParams2");
params2
.initializ
e<float2>(cu, numTorsions, "rbTorsionParams2");
vector<float4> paramVector1(numTorsions);
vector<float4> paramVector1(numTorsions);
vector<float2> paramVector2(numTorsions);
vector<float2> paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
for (int i = 0; i < numTorsions; i++) {
...
@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
...
@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
paramVector2[i] = make_float2((float) c4, (float) c5);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
}
params1
->
upload(paramVector1);
params1
.
upload(paramVector1);
params2
->
upload(paramVector2);
params2
.
upload(paramVector2);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1
->
getDevicePointer(), "float4");
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1
.
getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2
->
getDevicePointer(), "float2");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2
.
getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
}
params1
->
upload(paramVector1);
params1
.
upload(paramVector1);
params2
->
upload(paramVector2);
params2
.
upload(paramVector2);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -1190,15 +1160,6 @@ private:
...
@@ -1190,15 +1160,6 @@ private:
const CMAPTorsionForce& force;
const CMAPTorsionForce& force;
};
};
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int numContexts = cu.getPlatformData().contexts.size();
...
@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
...
@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
vector<int> torsionMapsVec(numTorsions);
vector<int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients
= CudaArray::creat
e<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
coefficients
.initializ
e<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions
= CudaArray::creat
e<int2>(cu, numMaps, "cmapTorsionMapPositions");
mapPositions
.initializ
e<int2>(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps
= CudaArray::creat
e<int>(cu, numTorsions, "cmapTorsionMaps");
torsionMaps
.initializ
e<int>(cu, numTorsions, "cmapTorsionMaps");
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
mapPositions
->
upload(mapPositionsVec);
mapPositions
.
upload(mapPositionsVec);
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
map<string, string> replacements;
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients
->
getDevicePointer(), "float4");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients
.
getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions
->
getDevicePointer(), "int2");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions
.
getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps
->
getDevicePointer(), "int");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps
.
getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
numTorsions = endIndex-startIndex;
if (mapPositions
->
getSize() != numMaps)
if (mapPositions
.
getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps
->
getSize() != numTorsions)
if (torsionMaps
.
getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
// Update the maps.
...
@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
}
}
coefficients
->
upload(coeffVec);
coefficients
.
upload(coeffVec);
// Update the indices.
// Update the indices.
...
@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
int index[8];
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
}
torsionMaps
->
upload(torsionMapsVec);
torsionMaps
.
upload(torsionMapsVec);
}
}
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
...
@@ -1327,8 +1288,6 @@ private:
...
@@ -1327,8 +1288,6 @@ private:
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
...
@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
...
@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
variables[name] = "torsionParams"+params->getParameterSuffix(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
string value = argName+"["+cu.intToString(i)+"]";
...
@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
...
@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
}
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
...
@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -1576,34 +1535,6 @@ private:
...
@@ -1576,34 +1535,6 @@ private:
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (directPmeGrid != NULL)
delete directPmeGrid;
if (reciprocalPmeGrid != NULL)
delete reciprocalPmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
if (sort != NULL)
delete sort;
delete sort;
if (fft != NULL)
if (fft != NULL)
...
@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize nonbonded interactions.
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
sigmaEpsilon
= CudaArray::creat
e<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon
.initializ
e<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
float4* posqf = (float4*) &temp[0];
...
@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList[exclusion.second].push_back(exclusion.first);
exclusionList[exclusion.second].push_back(exclusion.first);
}
}
posq.upload(&temp[0]);
posq.upload(&temp[0]);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
...
@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums
= new CudaArray
(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums
.initialize
(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
...
@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid
= new CudaArray
(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
directPmeGrid
.initialize
(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid
= new CudaArray
(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
reciprocalPmeGrid
.initialize
(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(
*
directPmeGrid);
cu.addAutoclearBuffer(directPmeGrid);
pmeBsplineModuliX
= new CudaArray
(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliX
.initialize
(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY
= new CudaArray
(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliY
.initialize
(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ
= new CudaArray
(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineModuliZ
.initialize
(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
if (doLJPME) {
pmeDispersionBsplineModuliX
= new CudaArray
(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliX
.initialize
(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY
= new CudaArray
(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliY
.initialize
(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ
= new CudaArray
(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
pmeDispersionBsplineModuliZ
.initialize
(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
}
pmeAtomRange
= CudaArray::creat
e<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomRange
.initializ
e<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex
= CudaArray::creat
e<int2>(cu, numParticles, "pmeAtomGridIndex");
pmeAtomGridIndex
.initializ
e<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer
= new CudaArray
(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
pmeEnergyBuffer
.initialize
(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(
*
pmeEnergyBuffer);
cu.clearBuffer(pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
int cufftVersion;
cufftGetVersion(&cufftVersion);
cufftGetVersion(&cufftVersion);
...
@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (recipForceGroup < 0)
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"),
*
pmeEnergyBuffer, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
}
}
hasInitializedFFT = true;
hasInitializedFFT = true;
...
@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = gridSizeX;
xsize = gridSizeX;
ysize = gridSizeY;
ysize = gridSizeY;
zsize = gridSizeZ;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
xmoduli =
&
pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
ymoduli =
&
pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
zmoduli =
&
pmeBsplineModuliZ;
}
}
else {
else {
if (!doLJPME)
if (!doLJPME)
...
@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
xsize = dispersionGridSizeX;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
xmoduli =
&
pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
ymoduli =
&
pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
zmoduli =
&
pmeDispersionBsplineModuliZ;
}
}
int maxSize = max(max(xsize, ysize), zsize);
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
vector<double> data(PmeOrder);
...
@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon
->
getDevicePointer()));
sizeof(float2), sigmaEpsilon
.
getDevicePointer()));
// Initialize the exceptions.
// Initialize the exceptions.
...
@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (numExceptions > 0) {
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams
= CudaArray::creat
e<float4>(cu, numExceptions, "exceptionParams");
exceptionParams
.initializ
e<float4>(cu, numExceptions, "exceptionParams");
vector<float4> exceptionParamsVector(numExceptions);
vector<float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
double chargeProd, sigma, epsilon;
...
@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
map<string, string> replacements;
map<string, string> replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams
->
getDevicePointer(), "float4");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams
.
getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
}
info = new ForceInfo(force);
info = new ForceInfo(force);
...
@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
if (cosSinSums
!= NULL
&& includeReciprocal) {
if (cosSinSums
.isInitialized()
&& includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
->
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
.
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums
->
getSize());
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums
.
getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
->
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums
.
getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
}
if (directPmeGrid
!= NULL
&& includeReciprocal) {
if (directPmeGrid
.isInitialized()
&& includeReciprocal) {
if (usePmeStream)
if (usePmeStream)
cu.setCurrentStream(pmeStream);
cu.setCurrentStream(pmeStream);
...
@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels.
// Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid
->
getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid
.
getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
}
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid
->
getDevicePointer(), (double2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecD2Z(fftForward, (double*) directPmeGrid
.
getDevicePointer(), (double2*) reciprocalPmeGrid
.
getDevicePointer());
else
else
cufftExecR2C(fftForward, (float*) directPmeGrid
->
getDevicePointer(), (float2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecR2C(fftForward, (float*) directPmeGrid
.
getDevicePointer(), (float2*) reciprocalPmeGrid
.
getDevicePointer());
}
}
else {
else {
fft->execFFT(
*
directPmeGrid,
*
reciprocalPmeGrid, true);
fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
}
if (includeEnergy) {
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
->
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
.
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
->
getDevicePointer(), &pmeBsplineModuliY
->
getDevicePointer(), &pmeBsplineModuliZ
->
getDevicePointer(),
&pmeBsplineModuliX
.
getDevicePointer(), &pmeBsplineModuliY
.
getDevicePointer(), &pmeBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
}
void* convolutionArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX
->
getDevicePointer(), &pmeBsplineModuliY
->
getDevicePointer(), &pmeBsplineModuliZ
->
getDevicePointer(),
&pmeBsplineModuliX
.
getDevicePointer(), &pmeBsplineModuliY
.
getDevicePointer(), &pmeBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid
->
getDevicePointer(), (double*) directPmeGrid
->
getDevicePointer());
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid
.
getDevicePointer(), (double*) directPmeGrid
.
getDevicePointer());
else
else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid
->
getDevicePointer(), (float*) directPmeGrid
->
getDevicePointer());
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid
.
getDevicePointer(), (float*) directPmeGrid
.
getDevicePointer());
}
}
else {
else {
fft->execFFT(
*
reciprocalPmeGrid,
*
directPmeGrid, false);
fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer()};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
// now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) {
if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(
*
pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
cu.clearBuffer(
*
directPmeGrid);
cu.clearBuffer(directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer(),
&sigmaEpsilon
->
getDevicePointer()};
&sigmaEpsilon
.
getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid
->
getDevicePointer()};
void* finishSpreadArgs[] = {&directPmeGrid
.
getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
}
}
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid
->
getDevicePointer(), (double2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid
.
getDevicePointer(), (double2*) reciprocalPmeGrid
.
getDevicePointer());
else
else
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid
->
getDevicePointer(), (float2*) reciprocalPmeGrid
->
getDevicePointer());
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid
.
getDevicePointer(), (float2*) reciprocalPmeGrid
.
getDevicePointer());
}
}
else {
else {
dispersionFft->execFFT(
*
directPmeGrid,
*
reciprocalPmeGrid, true);
dispersionFft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
}
if (includeEnergy) {
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
->
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
void* computeEnergyArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), usePmeStream ? &pmeEnergyBuffer
.
getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
->
getDevicePointer(), &pmeDispersionBsplineModuliY
->
getDevicePointer(), &pmeDispersionBsplineModuliZ
->
getDevicePointer(),
&pmeDispersionBsplineModuliX
.
getDevicePointer(), &pmeDispersionBsplineModuliY
.
getDevicePointer(), &pmeDispersionBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
}
void* convolutionArgs[] = {&reciprocalPmeGrid
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
void* convolutionArgs[] = {&reciprocalPmeGrid
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX
->
getDevicePointer(), &pmeDispersionBsplineModuliY
->
getDevicePointer(), &pmeDispersionBsplineModuliZ
->
getDevicePointer(),
&pmeDispersionBsplineModuliX
.
getDevicePointer(), &pmeDispersionBsplineModuliY
.
getDevicePointer(), &pmeDispersionBsplineModuliZ
.
getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid
->
getDevicePointer(), (double*) directPmeGrid
->
getDevicePointer());
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid
.
getDevicePointer(), (double*) directPmeGrid
.
getDevicePointer());
else
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid
->
getDevicePointer(), (float*) directPmeGrid
->
getDevicePointer());
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid
.
getDevicePointer(), (float*) directPmeGrid
.
getDevicePointer());
}
}
else {
else {
dispersionFft->execFFT(
*
reciprocalPmeGrid,
*
directPmeGrid, false);
dispersionFft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
->
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid
.
getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
->
getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex
.
getDevicePointer(),
&sigmaEpsilon
->
getDevicePointer()};
&sigmaEpsilon
.
getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
}
if (usePmeStream) {
if (usePmeStream) {
...
@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
sumSquaredCharges += charge*charge;
sumSquaredCharges += charge*charge;
}
}
cu.setCharges(chargeVector);
cu.setCharges(chargeVector);
sigmaEpsilon
->
upload(sigmaEpsilonVector);
sigmaEpsilon
.
upload(sigmaEpsilonVector);
// Record the exceptions.
// Record the exceptions.
...
@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
}
exceptionParams
->
upload(exceptionParamsVector);
exceptionParams
.
upload(exceptionParamsVector);
}
}
// Compute other values.
// Compute other values.
...
@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
...
@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (auto function : tabulatedFunctions)
delete function;
if (forceCopy != NULL)
if (forceCopy != NULL)
delete forceCopy;
delete forceCopy;
}
}
...
@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int numParticles = force.getNumParticles();
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector<vector<float> > paramVector(numParticles);
vector<vector<float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
...
@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
vector<string> tableTypes;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDevicePointer()));
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDevicePointer()));
if (width == 1)
if (width == 1)
tableTypes.push_back("float");
tableTypes.push_back("float");
else
else
...
@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
...
@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
->
getDevicePointer()));
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
.
getDevicePointer()));
}
}
}
}
info = new ForceInfo(force);
info = new ForceInfo(force);
...
@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
for (; indexInTileSet < 32; indexInTileSet++)
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(make_int4(0, 0, 0, 0));
groupData.push_back(make_int4(0, 0, 0, 0));
}
}
interactionGroupData
= CudaArray::creat
e<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData
.initializ
e<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData
->
upload(groupData);
interactionGroupData
.
upload(groupData);
// Create the kernel.
// Create the kernel.
...
@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
if (globals
!= NULL
)
if (globals
.isInitialized()
)
args<<", const float* __restrict__ globals";
args<<", const float* __restrict__ globals";
if (hasParamDerivs)
if (hasParamDerivs)
args << ", mixed* __restrict__ energyParamDerivs";
args << ", mixed* __restrict__ energyParamDerivs";
...
@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
}
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed) {
if (changed) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
if (forceCopy != NULL) {
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
hasInitializedLongRangeCorrection = true;
...
@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
hasInitializedLongRangeCorrection = true;
}
}
if (interactionGroupData
!= NULL
) {
if (interactionGroupData
.isInitialized()
) {
if (!hasInitializedKernel) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData
->
getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData
.
getDevicePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
...
@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
interactionGroupArgs.push_back(&buffer.getMemory());
interactionGroupArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
interactionGroupArgs.push_back(&function
->
getDevicePointer());
interactionGroupArgs.push_back(&function
.
getDevicePointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
interactionGroupArgs.push_back(&globals
->
getDevicePointer());
interactionGroupArgs.push_back(&globals
.
getDevicePointer());
if (hasParamDerivs)
if (hasParamDerivs)
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
}
}
...
@@ -2855,38 +2781,24 @@ private:
...
@@ -2855,38 +2781,24 @@ private:
const GBSAOBCForce& force;
const GBSAOBCForce& force;
};
};
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent();
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params
= CudaArray::creat
e<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
params
.initializ
e<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
if (cu.getUseDoublePrecision()) {
if (cu.getUseDoublePrecision()) {
bornRadii
= CudaArray::creat
e<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
bornRadii
.initializ
e<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
= CudaArray::creat
e<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
obcChain
.initializ
e<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
}
else {
else {
bornRadii
= CudaArray::creat
e<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
bornRadii
.initializ
e<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain
= CudaArray::creat
e<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
obcChain
.initializ
e<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
}
}
bornSum
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornSum
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
bornForce
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(
*
bornSum);
cu.addAutoclearBuffer(bornSum);
cu.addAutoclearBuffer(
*
bornForce);
cu.addAutoclearBuffer(bornForce);
CudaArray& posq = cu.getPosq();
CudaArray& posq = cu.getPosq();
vector<double4> temp(posq.getSize());
vector<double4> temp(posq.getSize());
float4* posqf = (float4*) &temp[0];
float4* posqf = (float4*) &temp[0];
...
@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
...
@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
posqf[i] = make_float4(0, 0, 0, (float) charge);
posqf[i] = make_float4(0, 0, 0, (float) charge);
}
}
posq.upload(&temp[0]);
posq.upload(&temp[0]);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...
@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
...
@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance();
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params
->
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params
.
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce
->
getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce
.
getDevicePointer()));
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
}
}
...
@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
map<string, string> replacements;
map<string, string> replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum
->
getDevicePointer());
computeSumArgs.push_back(&bornSum
.
getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(¶ms
->
getDevicePointer());
computeSumArgs.push_back(¶ms
.
getDevicePointer());
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...
@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce
->
getDevicePointer());
force1Args.push_back(&bornForce
.
getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii
->
getDevicePointer());
force1Args.push_back(&bornRadii
.
getDevicePointer());
force1Args.push_back(NULL);
force1Args.push_back(NULL);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
...
@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
}
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum
->
getDevicePointer(), ¶ms
->
getDevicePointer(),
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum
.
getDevicePointer(), ¶ms
.
getDevicePointer(),
&bornRadii
->
getDevicePointer(), &obcChain
->
getDevicePointer()};
&bornRadii
.
getDevicePointer(), &obcChain
.
getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce
->
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms
->
getDevicePointer(),
void* reduceForceArgs[] = {&bornForce
.
getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms
.
getDevicePointer(),
&bornRadii
->
getDevicePointer(), &obcChain
->
getDevicePointer()};
&bornRadii
.
getDevicePointer(), &obcChain
.
getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0;
return 0.0;
}
}
...
@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
...
@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
}
}
cu.setCharges(chargeVector);
cu.setCharges(chargeVector);
params
->
upload(paramsVector);
params
.
upload(paramsVector);
// Mark that the current reordering may be invalid.
// Mark that the current reordering may be invalid.
...
@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
...
@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete energyDerivs;
delete energyDerivs;
if (energyDerivChain != NULL)
if (energyDerivChain != NULL)
delete energyDerivChain;
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
for (auto function : tabulatedFunctions)
delete function;
for (auto d : dValue0dParam)
delete d;
for (auto d : dValuedParam)
for (auto d : dValuedParam)
delete d;
delete d;
}
}
...
@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
vector<vector<int> > exclusionList(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
...
@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
tabulatedFunctions.size()-1]->
getDevicePointer()));
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[
i].
getDevicePointer()));
tableArgs << ", const float";
tableArgs << ", const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
// Record derivatives of expressions needed for the chain rule terms.
...
@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
}
}
longEnergyDerivs
= CudaArray::creat
e<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
longEnergyDerivs
.initializ
e<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
dValue0dParam
.push_back(CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam")
)
;
dValue0dParam
[i].initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam");
cu.addAutoclearBuffer(
*
dValue0dParam
.back()
);
cu.addAutoclearBuffer(dValue0dParam
[i]
);
string name = force.getEnergyParameterDerivativeName(i);
string name = force.getEnergyParameterDerivativeName(i);
cu.addEnergyParameterDerivative(name);
cu.addEnergyParameterDerivative(name);
}
}
...
@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
->
getDevicePointer()));
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals
.
getDevicePointer()));
}
}
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (auto param : parameters)
for (auto param : parameters)
...
@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
}
}
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
cu.addAutoclearBuffer(
*
longEnergyDerivs);
cu.addAutoclearBuffer(longEnergyDerivs);
}
}
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...
@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
// Set arguments for kernels.
// Set arguments for kernels.
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
valueBuffers
= CudaArray::creat
e<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
valueBuffers
.initializ
e<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(
*
valueBuffers);
cu.addAutoclearBuffer(valueBuffers);
cu.clearBuffer(valueBuffers
->
getDevicePointer(), sizeof(long long)*valueBuffers
->
getSize());
cu.clearBuffer(valueBuffers
.
getDevicePointer(), sizeof(long long)*valueBuffers
.
getSize());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairValueArgs.push_back(&valueBuffers
->
getDevicePointer());
pairValueArgs.push_back(&valueBuffers
.
getDevicePointer());
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...
@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
}
else
else
pairValueArgs.push_back(&maxTiles);
pairValueArgs.push_back(&maxTiles);
if (globals
!= NULL
)
if (globals
.isInitialized()
)
pairValueArgs.push_back(&globals
->
getDevicePointer());
pairValueArgs.push_back(&globals
.
getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i])
if (pairValueUsesParam[i])
pairValueArgs.push_back(¶ms->getBuffers()[i].getMemory());
pairValueArgs.push_back(¶ms->getBuffers()[i].getMemory());
}
}
for (auto d : dValue0dParam)
for (auto
&
d : dValue0dParam)
pairValueArgs.push_back(&d
->
getDevicePointer());
pairValueArgs.push_back(&d
.
getDevicePointer());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
pairValueArgs.push_back(&function
->
getDevicePointer());
pairValueArgs.push_back(&function
.
getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers
->
getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers
.
getDevicePointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
perParticleValueArgs.push_back(&globals
->
getDevicePointer());
perParticleValueArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
perParticleValueArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
perParticleValueArgs.push_back(&buffer.getMemory());
perParticleValueArgs.push_back(&buffer.getMemory());
for (int i = 0; i < dValuedParam.size(); i++) {
for (int i = 0; i < dValuedParam.size(); i++) {
perParticleValueArgs.push_back(&dValue0dParam[i]
->
getDevicePointer());
perParticleValueArgs.push_back(&dValue0dParam[i]
.
getDevicePointer());
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
}
}
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
perParticleValueArgs.push_back(&function
->
getDevicePointer());
perParticleValueArgs.push_back(&function
.
getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
...
@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
}
else
else
pairEnergyArgs.push_back(&maxTiles);
pairEnergyArgs.push_back(&maxTiles);
if (globals
!= NULL
)
if (globals
.isInitialized()
)
pairEnergyArgs.push_back(&globals
->
getDevicePointer());
pairEnergyArgs.push_back(&globals
.
getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i])
if (pairEnergyUsesParam[i])
pairEnergyArgs.push_back(¶ms->getBuffers()[i].getMemory());
pairEnergyArgs.push_back(¶ms->getBuffers()[i].getMemory());
...
@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (pairEnergyUsesValue[i])
if (pairEnergyUsesValue[i])
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
}
}
pairEnergyArgs.push_back(&longEnergyDerivs
->
getDevicePointer());
pairEnergyArgs.push_back(&longEnergyDerivs
.
getDevicePointer());
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
pairEnergyArgs.push_back(&function
->
getDevicePointer());
pairEnergyArgs.push_back(&function
.
getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
perParticleEnergyArgs.push_back(&globals
->
getDevicePointer());
perParticleEnergyArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
...
@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&buffer.getMemory());
for (auto& buffer : energyDerivChain->getBuffers())
for (auto& buffer : energyDerivChain->getBuffers())
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&buffer.getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs
->
getDevicePointer());
perParticleEnergyArgs.push_back(&longEnergyDerivs
.
getDevicePointer());
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
perParticleEnergyArgs.push_back(&function
->
getDevicePointer());
perParticleEnergyArgs.push_back(&function
.
getDevicePointer());
if (needParameterGradient || needEnergyParamDerivs) {
if (needParameterGradient || needEnergyParamDerivs) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
gradientChainRuleArgs.push_back(&globals
->
getDevicePointer());
gradientChainRuleArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
gradientChainRuleArgs.push_back(&buffer.getMemory());
gradientChainRuleArgs.push_back(&buffer.getMemory());
for (auto& buffer : computedValues->getBuffers())
for (auto& buffer : computedValues->getBuffers())
...
@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
}
}
}
}
}
}
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
pairEnergyArgs[5] = &includeEnergy;
pairEnergyArgs[5] = &includeEnergy;
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
...
@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
...
@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
}
}
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
...
@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
...
@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
variables[name] = "particleParams"+params->getParameterSuffix(i);
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
string value = argName+"["+cu.intToString(i)+"]";
...
@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
...
@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
}
}
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
...
@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
...
@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
delete donorParams;
delete donorParams;
if (acceptorParams != NULL)
if (acceptorParams != NULL)
delete acceptorParams;
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
for (auto function : tabulatedFunctions)
delete function;
}
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
...
@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
if (numDonors == 0 || numAcceptors == 0)
if (numDonors == 0 || numAcceptors == 0)
return;
return;
int numParticles = system.getNumParticles();
int numParticles = system.getNumParticles();
donors
= CudaArray::creat
e<int4>(cu, numDonors, "customHbondDonors");
donors
.initializ
e<int4>(cu, numDonors, "customHbondDonors");
acceptors
= CudaArray::creat
e<int4>(cu, numAcceptors, "customHbondAcceptors");
acceptors
.initializ
e<int4>(cu, numAcceptors, "customHbondAcceptors");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
if (force.getNumGlobalParameters() > 0)
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
vector<vector<float> > donorParamVector(numDonors);
vector<vector<float> > donorParamVector(numDonors);
vector<int4> donorVector(numDonors);
vector<int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
for (int i = 0; i < numDonors; i++) {
...
@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (float) parameters[j];
donorParamVector[i][j] = (float) parameters[j];
}
}
donors
->
upload(donorVector);
donors
.
upload(donorVector);
donorParams->setParameterValues(donorParamVector);
donorParams->setParameterValues(donorParamVector);
vector<vector<float> > acceptorParamVector(numAcceptors);
vector<vector<float> > acceptorParamVector(numAcceptors);
vector<int4> acceptorVector(numAcceptors);
vector<int4> acceptorVector(numAcceptors);
...
@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
for (int j = 0; j < (int) parameters.size(); j++)
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (float) parameters[j];
acceptorParamVector[i][j] = (float) parameters[j];
}
}
acceptors
->
upload(acceptorVector);
acceptors
.
upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
acceptorParams->setParameterValues(acceptorParamVector);
info = new ForceInfo(force);
info = new ForceInfo(force);
cu.addForce(info);
cu.addForce(info);
...
@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
else
else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
}
}
donorExclusions
= CudaArray::creat
e<int4>(cu, numDonors, "customHbondDonorExclusions");
donorExclusions
.initializ
e<int4>(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions
= CudaArray::creat
e<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
acceptorExclusions
.initializ
e<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions
->
upload(donorExclusionVector);
donorExclusions
.
upload(donorExclusionVector);
acceptorExclusions
->
upload(acceptorExclusionVector);
acceptorExclusions
.
upload(acceptorExclusionVector);
// Record the tabulated functions.
// Record the tabulated functions.
...
@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
tableArgs << ", const float";
tableArgs << ", const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
}
if (globals
!= NULL
)
if (globals
.isInitialized()
)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
map<string, string> variables;
map<string, string> variables;
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const string& name = force.getPerDonorParameterName(i);
const string& name = force.getPerDonorParameterName(i);
...
@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
...
@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numDonors == 0 || numAcceptors == 0)
if (numDonors == 0 || numAcceptors == 0)
return 0.0;
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
...
@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
if (!hasInitializedKernel) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
hasInitializedKernel = true;
...
@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
...
@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
donorArgs.push_back(&cu.getForce().getDevicePointer());
donorArgs.push_back(&cu.getForce().getDevicePointer());
donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
donorArgs.push_back(&cu.getPosq().getDevicePointer());
donorArgs.push_back(&cu.getPosq().getDevicePointer());
donorArgs.push_back(&donorExclusions
->
getDevicePointer());
donorArgs.push_back(&donorExclusions
.
getDevicePointer());
donorArgs.push_back(&donors
->
getDevicePointer());
donorArgs.push_back(&donors
.
getDevicePointer());
donorArgs.push_back(&acceptors
->
getDevicePointer());
donorArgs.push_back(&acceptors
.
getDevicePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
donorArgs.push_back(&globals
->
getDevicePointer());
donorArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
for (auto& buffer : donorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
donorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
for (auto& buffer : acceptorParams->getBuffers())
donorArgs.push_back(&buffer.getMemory());
donorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
donorArgs.push_back(&function
->
getDevicePointer());
donorArgs.push_back(&function
.
getDevicePointer());
index = 0;
index = 0;
acceptorArgs.push_back(&cu.getForce().getDevicePointer());
acceptorArgs.push_back(&cu.getForce().getDevicePointer());
acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions
->
getDevicePointer());
acceptorArgs.push_back(&acceptorExclusions
.
getDevicePointer());
acceptorArgs.push_back(&donors
->
getDevicePointer());
acceptorArgs.push_back(&donors
.
getDevicePointer());
acceptorArgs.push_back(&acceptors
->
getDevicePointer());
acceptorArgs.push_back(&acceptors
.
getDevicePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals
!= NULL
)
if (globals
.isInitialized()
)
acceptorArgs.push_back(&globals
->
getDevicePointer());
acceptorArgs.push_back(&globals
.
getDevicePointer());
for (auto& buffer : donorParams->getBuffers())
for (auto& buffer : donorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
acceptorArgs.push_back(&buffer.getMemory());
for (auto& buffer : acceptorParams->getBuffers())
for (auto& buffer : acceptorParams->getBuffers())
acceptorArgs.push_back(&buffer.getMemory());
acceptorArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
acceptorArgs.push_back(&function
->
getDevicePointer());
acceptorArgs.push_back(&function
.
getDevicePointer());
}
}
int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
...
@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
...
@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (auto function : tabulatedFunctions)
delete function;
}
}
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
...
@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
for (int j = 0; j < normalizedWeights[i].size(); j++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
}
groupParticles
= CudaArray::creat
e<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles
.initializ
e<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles
->
upload(groupParticleVec);
groupParticles
.
upload(groupParticleVec);
if (cu.getUseDoublePrecision()) {
if (cu.getUseDoublePrecision()) {
groupWeights
= CudaArray::creat
e<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
.initializ
e<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecDouble);
groupWeights
.
upload(groupWeightVecDouble);
centerPositions
= CudaArray::creat
e<double4>(cu, numGroups, "centerPositions");
centerPositions
.initializ
e<double4>(cu, numGroups, "centerPositions");
}
}
else {
else {
groupWeights
= CudaArray::creat
e<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
.initializ
e<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights
->
upload(groupWeightVecFloat);
groupWeights
.
upload(groupWeightVecFloat);
centerPositions
= CudaArray::creat
e<float4>(cu, numGroups, "centerPositions");
centerPositions
.initializ
e<float4>(cu, numGroups, "centerPositions");
}
}
groupOffsets
= CudaArray::creat
e<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets
.initializ
e<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets
->
upload(groupOffsetVec);
groupOffsets
.
upload(groupOffsetVec);
groupForces
= CudaArray::creat
e<long long>(cu, numGroups*3, "groupForces");
groupForces
.initializ
e<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(
*
groupForces);
cu.addAutoclearBuffer(groupForces);
// Record the bonds.
// Record the bonds.
...
@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j];
paramVector[i][j] = (float) parameters[j];
}
}
params->setParameterValues(paramVector);
params->setParameterValues(paramVector);
bondGroups
= CudaArray::creat
e<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups
.initializ
e<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups
->
upload(bondGroupVec);
bondGroups
.
upload(bondGroupVec);
// Record the arguments to the force kernel.
// Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces
->
getDevicePointer());
groupForcesArgs.push_back(&groupForces
.
getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(¢erPositions
->
getDevicePointer());
groupForcesArgs.push_back(¢erPositions
.
getDevicePointer());
groupForcesArgs.push_back(&bondGroups
->
getDevicePointer());
groupForcesArgs.push_back(&bondGroups
.
getDevicePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
...
@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
stringstream extraArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions
.back()->
upload(f);
tabulatedFunctions
[i].
upload(f);
extraArgs << ", const float";
extraArgs << ", const float";
if (width > 1)
if (width > 1)
extraArgs << width;
extraArgs << width;
extraArgs << "* __restrict__ " << arrayName;
extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions
.back()->
getDevicePointer());
groupForcesArgs.push_back(&tabulatedFunctions
[i].
getDevicePointer());
}
}
// Record information about parameters.
// Record information about parameters.
...
@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
extraArgs << ", mixed* __restrict__ energyParamDerivs";
extraArgs << ", mixed* __restrict__ energyParamDerivs";
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals";
extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
string value = "globals["+cu.intToString(i)+"]";
variables[name] = value;
variables[name] = value;
}
}
groupForcesArgs.push_back(&globals
->
getDevicePointer());
groupForcesArgs.push_back(&globals
.
getDevicePointer());
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
if (numBonds == 0)
return 0.0;
return 0.0;
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
...
@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles
->
getDevicePointer(), &groupWeights
->
getDevicePointer(),
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles
.
getDevicePointer(), &groupWeights
.
getDevicePointer(),
&groupOffsets
->
getDevicePointer(), ¢erPositions
->
getDevicePointer()};
&groupOffsets
.
getDevicePointer(), ¢erPositions
.
getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
if (needEnergyParamDerivs)
if (needEnergyParamDerivs)
groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles
->
getDevicePointer(), &groupWeights
->
getDevicePointer(), &groupOffsets
->
getDevicePointer(),
void* applyForcesArgs[] = {&groupParticles
.
getDevicePointer(), &groupWeights
.
getDevicePointer(), &groupOffsets
.
getDevicePointer(),
&groupForces
->
getDevicePointer(), &cu.getForce().getDevicePointer()};
&groupForces
.
getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0;
return 0.0;
}
}
...
@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
...
@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (globals != NULL)
delete globals;
for (auto function : tabulatedFunctions)
delete function;
}
}
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
...
@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions;
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions.push_back(array);
tabulatedFunctions[i].upload(f);
array->upload(f);
string arrayName = cu.getBondedUtilities().addArgument(tabulatedFunctions[i].getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
functionDefinitions.push_back(make_pair(name, arrayName));
functionDefinitions.push_back(make_pair(name, arrayName));
}
}
...
@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
variables[name] = "bondParams"+params->getParameterSuffix(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
}
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0) {
globals
= CudaArray::creat
e<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals
.initializ
e<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals
->
getDevicePointer(), "float");
string argName = cu.getBondedUtilities().addArgument(globals
.
getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
string value = argName+"["+cu.intToString(i)+"]";
...
@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
}
}
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals
!= NULL
) {
if (globals
.isInitialized()
) {
bool changed = false;
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
float value = (float) context.getParameter(globalParamNames[i]);
...
@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
...
@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
globalParamValues[i] = value;
globalParamValues[i] = value;
}
}
if (changed)
if (changed)
globals
->
upload(globalParamValues);
globals
.
upload(globalParamValues);
}
}
return 0.0;
return 0.0;
}
}
...
@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
...
@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
cu.setAsCurrent();
cu.setAsCurrent();
if (params != NULL)
if (params != NULL)
delete params;
delete params;
if (orderIndex != NULL)
delete orderIndex;
if (particleOrder != NULL)
delete particleOrder;
if (particleTypes != NULL)
delete particleTypes;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighborPairs != NULL)
delete neighborPairs;
if (numNeighborPairs != NULL)
delete numNeighborPairs;
if (neighborStartIndex != NULL)
delete neighborStartIndex;
if (neighbors != NULL)
delete neighbors;
if (numNeighborsForAtom != NULL)
delete numNeighborsForAtom;
for (auto function : tabulatedFunctions)
delete function;
}
}
void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
...
@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
vector<pair<string, string> > functionDefinitions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string name = force.getTabulatedFunctionName(i);
...
@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions
.push_back(CudaArray::creat
e<float>(cu, f.size(), "TabulatedFunction")
)
;
tabulatedFunctions
[i].initializ
e<float>(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[
tabulatedFunctions.size()-1]->
upload(f);
tabulatedFunctions[
i].
upload(f);
tableArgs << ", const float";
tableArgs << ", const float";
if (width > 1)
if (width > 1)
tableArgs << width;
tableArgs << width;
...
@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
bool hasTypeFilters = (particleOrderVec.size() > 1);
bool hasTypeFilters = (particleOrderVec.size() > 1);
if (hasTypeFilters) {
if (hasTypeFilters) {
particleTypes
= CudaArray::creat
e<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
particleTypes
.initializ
e<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
orderIndex
= CudaArray::creat
e<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
orderIndex
.initializ
e<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
particleOrder
= CudaArray::creat
e<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleOrder
.initializ
e<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
particleTypes
->
upload(particleTypesVec);
particleTypes
.
upload(particleTypesVec);
orderIndex
->
upload(orderIndexVec);
orderIndex
.
upload(orderIndexVec);
vector<int> flattenedOrder(particleOrder
->
getSize());
vector<int> flattenedOrder(particleOrder
.
getSize());
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int i = 0; i < (int) particleOrderVec.size(); i++)
for (int j = 0; j < particlesPerSet; j++)
for (int j = 0; j < particlesPerSet; j++)
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
particleOrder
->
upload(flattenedOrder);
particleOrder
.
upload(flattenedOrder);
}
}
// Build data structures for exclusions.
// Build data structures for exclusions.
...
@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
exclusionStartIndexVec[i+1] = exclusionsVec.size();
exclusionStartIndexVec[i+1] = exclusionsVec.size();
}
}
exclusions
= CudaArray::creat
e<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusions
.initializ
e<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
exclusionStartIndex
= CudaArray::creat
e<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusionStartIndex
.initializ
e<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
exclusions
->
upload(exclusionsVec);
exclusions
.
upload(exclusionsVec);
exclusionStartIndex
->
upload(exclusionStartIndexVec);
exclusionStartIndex
.
upload(exclusionStartIndexVec);
}
}
// Build data structures for the neighbor list.
// Build data structures for the neighbor list.
...
@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
int numAtomBlocks = cu.getNumAtomBlocks();
int numAtomBlocks = cu.getNumAtomBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter
= new CudaArray
(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockCenter
.initialize
(cu, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox
= new CudaArray
(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
blockBoundingBox
.initialize
(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
numNeighborPairs
= CudaArray::creat
e<int>(cu, 1, "customManyParticleNumNeighborPairs");
numNeighborPairs
.initializ
e<int>(cu, 1, "customManyParticleNumNeighborPairs");
neighborStartIndex
= CudaArray::creat
e<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
neighborStartIndex
.initializ
e<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
numNeighborsForAtom
= CudaArray::creat
e<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
numNeighborsForAtom
.initializ
e<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");
// Select a size for the array that holds the neighbor list. We have to make a fairly
// Select a size for the array that holds the neighbor list. We have to make a fairly
// arbitrary guess, but if this turns out to be too small we'll increase it later.
// arbitrary guess, but if this turns out to be too small we'll increase it later.
maxNeighborPairs = 150*numParticles;
maxNeighborPairs = 150*numParticles;
neighborPairs
= CudaArray::creat
e<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighborPairs
.initializ
e<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors
= CudaArray::creat
e<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
neighbors
.initializ
e<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// Now to generate the kernel. First, it needs to calculate all distances, angles,
...
@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(&neighbors
->
getDevicePointer());
forceArgs.push_back(&neighbors
.
getDevicePointer());
forceArgs.push_back(&neighborStartIndex
->
getDevicePointer());
forceArgs.push_back(&neighborStartIndex
.
getDevicePointer());
}
}
if (particleTypes
!= NULL
) {
if (particleTypes
.isInitialized()
) {
forceArgs.push_back(&particleTypes
->
getDevicePointer());
forceArgs.push_back(&particleTypes
.
getDevicePointer());
forceArgs.push_back(&orderIndex
->
getDevicePointer());
forceArgs.push_back(&orderIndex
.
getDevicePointer());
forceArgs.push_back(&particleOrder
->
getDevicePointer());
forceArgs.push_back(&particleOrder
.
getDevicePointer());
}
}
if (exclusions
!= NULL
) {
if (exclusions
.isInitialized()
) {
forceArgs.push_back(&exclusions
->
getDevicePointer());
forceArgs.push_back(&exclusions
.
getDevicePointer());
forceArgs.push_back(&exclusionStartIndex
->
getDevicePointer());
forceArgs.push_back(&exclusionStartIndex
.
getDevicePointer());
}
}
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
forceArgs.push_back(&buffer.getMemory());
forceArgs.push_back(&buffer.getMemory());
for (auto function : tabulatedFunctions)
for (auto
&
function : tabulatedFunctions)
forceArgs.push_back(&function
->
getDevicePointer());
forceArgs.push_back(&function
.
getDevicePointer());
if (nonbondedMethod != NoCutoff) {
if (nonbondedMethod != NoCutoff) {
// Set arguments for the block bounds kernel.
// Set arguments for the block bounds kernel.
...
@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&blockCenter
->
getDevicePointer());
blockBoundsArgs.push_back(&blockCenter
.
getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox
->
getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox
.
getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
blockBoundsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
// Set arguments for the neighbor list kernel.
// Set arguments for the neighbor list kernel.
...
@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&blockCenter
->
getDevicePointer());
neighborsArgs.push_back(&blockCenter
.
getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox
->
getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox
.
getDevicePointer());
neighborsArgs.push_back(&neighborPairs
->
getDevicePointer());
neighborsArgs.push_back(&neighborPairs
.
getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
neighborsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
neighborsArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
neighborsArgs.push_back(&maxNeighborPairs);
neighborsArgs.push_back(&maxNeighborPairs);
if (exclusions
!= NULL
) {
if (exclusions
.isInitialized()
) {
neighborsArgs.push_back(&exclusions
->
getDevicePointer());
neighborsArgs.push_back(&exclusions
.
getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex
->
getDevicePointer());
neighborsArgs.push_back(&exclusionStartIndex
.
getDevicePointer());
}
}
// Set arguments for the kernel to find neighbor list start indices.
// Set arguments for the kernel to find neighbor list start indices.
startIndicesArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
startIndicesArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex
->
getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex
.
getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs
->
getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs
.
getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs);
startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list.
// Set arguments for the kernel to assemble the final neighbor list.
copyPairsArgs.push_back(&neighborPairs
->
getDevicePointer());
copyPairsArgs.push_back(&neighborPairs
.
getDevicePointer());
copyPairsArgs.push_back(&neighbors
->
getDevicePointer());
copyPairsArgs.push_back(&neighbors
.
getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs
->
getDevicePointer());
copyPairsArgs.push_back(&numNeighborPairs
.
getDevicePointer());
copyPairsArgs.push_back(&maxNeighborPairs);
copyPairsArgs.push_back(&maxNeighborPairs);
copyPairsArgs.push_back(&numNeighborsForAtom
->
getDevicePointer());
copyPairsArgs.push_back(&numNeighborsForAtom
.
getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex
->
getDevicePointer());
copyPairsArgs.push_back(&neighborStartIndex
.
getDevicePointer());
}
}
}
}
if (globalParamValues.size() > 0) {
if (globalParamValues.size() > 0) {
...
@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
// We need to make sure there was enough memory for the neighbor list. Download the
// We need to make sure there was enough memory for the neighbor list. Download the
// information asynchronously so kernels can be running at the same time.
// information asynchronously so kernels can be running at the same time.
numNeighborPairs
->
download(numPairs, false);
numNeighborPairs
.
download(numPairs, false);
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
cu.executeKernel(copyPairsKernel, ©PairsArgs[0], maxNeighborPairs);
cu.executeKernel(copyPairsKernel, ©PairsArgs[0], maxNeighborPairs);
...
@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
if (*numPairs > maxNeighborPairs) {
if (*numPairs > maxNeighborPairs) {
// Resize the arrays and run the calculation again.
// Resize the arrays and run the calculation again.
delete neighborPairs;
neighborPairs = NULL;
delete neighbors;
neighbors = NULL;
maxNeighborPairs = (int) (1.1*(*numPairs));
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs
= CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs"
);
neighborPairs
.resize(maxNeighborPairs
);
neighbors
= CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors"
);
neighbors
.resize(maxNeighborPairs
);
forceArgs[5] = &neighbors
->
getDevicePointer();
forceArgs[5] = &neighbors
.
getDevicePointer();
neighborsArgs[5] = &neighborPairs
->
getDevicePointer();
neighborsArgs[5] = &neighborPairs
.
getDevicePointer();
copyPairsArgs[0] = &neighborPairs
->
getDevicePointer();
copyPairsArgs[0] = &neighborPairs
.
getDevicePointer();
copyPairsArgs[1] = &neighbors
->
getDevicePointer();
copyPairsArgs[1] = &neighbors
.
getDevicePointer();
continue;
continue;
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment