Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
19d2885a
Commit
19d2885a
authored
Jan 23, 2014
by
Lee-Ping
Browse files
Merge github.com:SimTk/openmm
parents
99ef4344
57a6768e
Changes
310
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
251 additions
and
75 deletions
+251
-75
platforms/cpu/tests/TestCpuSettle.cpp
platforms/cpu/tests/TestCpuSettle.cpp
+120
-0
platforms/cuda/CMakeLists.txt
platforms/cuda/CMakeLists.txt
+2
-6
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+5
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-1
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+8
-1
platforms/cuda/sharedTarget/CMakeLists.txt
platforms/cuda/sharedTarget/CMakeLists.txt
+6
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+25
-16
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+25
-9
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+24
-9
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+4
-4
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+4
-4
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+1
-1
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+8
-8
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+4
-4
platforms/cuda/tests/CMakeLists.txt
platforms/cuda/tests/CMakeLists.txt
+5
-0
platforms/cuda/tests/TestCudaCustomGBForce.cpp
platforms/cuda/tests/TestCudaCustomGBForce.cpp
+1
-1
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
+1
-1
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+2
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+2
-1
platforms/opencl/CMakeLists.txt
platforms/opencl/CMakeLists.txt
+2
-6
No files found.
platforms/cpu/tests/TestCpuSettle.cpp
0 → 100644
View file @
19d2885a
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CPU implementation of the SETTLE algorithm.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
void
testConstraints
()
{
const
int
numMolecules
=
10
;
const
int
numParticles
=
numMolecules
*
3
;
const
int
numConstraints
=
numMolecules
*
3
;
const
double
temp
=
100.0
;
CpuPlatform
platform
;
System
system
;
LangevinIntegrator
integrator
(
temp
,
2.0
,
0.001
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numMolecules
;
++
i
)
{
system
.
addParticle
(
16.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
forceField
->
addParticle
(
-
0.82
,
0.317
,
0.65
);
forceField
->
addParticle
(
0.41
,
1.0
,
0.0
);
forceField
->
addParticle
(
0.41
,
1.0
,
0.0
);
system
.
addConstraint
(
i
*
3
,
i
*
3
+
1
,
0.1
);
system
.
addConstraint
(
i
*
3
,
i
*
3
+
2
,
0.1
);
system
.
addConstraint
(
i
*
3
+
1
,
i
*
3
+
2
,
0.163
);
}
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numMolecules
;
++
i
)
{
positions
[
i
*
3
]
=
Vec3
((
i
%
4
)
*
0.4
,
(
i
/
4
)
*
0.4
,
0
);
positions
[
i
*
3
+
1
]
=
positions
[
i
*
3
]
+
Vec3
(
0.1
,
0
,
0
);
positions
[
i
*
3
+
2
]
=
positions
[
i
*
3
]
+
Vec3
(
-
0.03333
,
0.09428
,
0
);
velocities
[
i
*
3
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
velocities
[
i
*
3
+
1
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
velocities
[
i
*
3
+
2
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Forces
);
for
(
int
j
=
0
;
j
<
numConstraints
;
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1e-5
);
}
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
!
CpuPlatform
::
isProcessorSupported
())
{
cout
<<
"CPU is not supported. Exiting."
<<
endl
;
return
0
;
}
testConstraints
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda/CMakeLists.txt
View file @
19d2885a
...
...
@@ -14,10 +14,6 @@
# libOpenMMCUDA_static[_d].a
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
ENDIF
(
APPLE
)
set
(
OPENMM_BUILD_CUDA_TESTS TRUE CACHE BOOL
"Whether to build CUDA test cases"
)
if
(
OPENMM_BUILD_CUDA_TESTS
)
SUBDIRS
(
tests
)
...
...
@@ -39,9 +35,9 @@ SET(STATIC_TARGET ${OPENMMCUDA_LIBRARY_NAME}_static)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
IF
(
MSVC
)
SET
(
CMAKE_DEBUG_POSTFIX
"_d"
CACHE INTERNAL
""
FORCE
)
ENDIF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
ENDIF
(
MSVC
)
# But on Unix or Cygwin we have to add the suffix manually
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
...
platforms/cuda/include/CudaContext.h
View file @
19d2885a
...
...
@@ -75,7 +75,7 @@ public:
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
std
::
string
&
precision
,
const
std
::
string
&
compiler
,
const
std
::
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
);
const
std
::
string
&
compiler
,
const
std
::
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
);
~
CudaContext
();
/**
* This is called to initialize internal data structures after all Forces in the system
...
...
@@ -623,6 +623,8 @@ public:
*/
class
CudaContext
::
ForcePreComputation
{
public:
virtual
~
ForcePreComputation
()
{
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
...
...
@@ -639,6 +641,8 @@ public:
*/
class
CudaContext
::
ForcePostComputation
{
public:
virtual
~
ForcePostComputation
()
{
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
...
...
platforms/cuda/include/CudaKernels.h
View file @
19d2885a
...
...
@@ -738,7 +738,7 @@ private:
class
CudaCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
CudaCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
hasInitializedKernels
(
false
),
cu
(
cu
),
params
(
NULL
),
computedValues
(
NULL
),
energyDerivs
(
NULL
),
energyDerivChain
(
NULL
),
longEnergyDerivs
(
NULL
),
globals
(
NULL
),
valueBuffers
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomGBForceKernel
();
...
...
@@ -772,6 +772,7 @@ private:
CudaParameterSet
*
params
;
CudaParameterSet
*
computedValues
;
CudaParameterSet
*
energyDerivs
;
CudaParameterSet
*
energyDerivChain
;
CudaArray
*
longEnergyDerivs
;
CudaArray
*
globals
;
CudaArray
*
valueBuffers
;
...
...
platforms/cuda/include/CudaPlatform.h
View file @
19d2885a
...
...
@@ -95,6 +95,13 @@ public:
static
const
std
::
string
key
=
"CudaCompiler"
;
return
key
;
}
/**
* This is the name of the parameter for specifying the host compiler for the CUDA compiler to use.
*/
static
const
std
::
string
&
CudaHostCompiler
()
{
static
const
std
::
string
key
=
"CudaHostCompiler"
;
return
key
;
}
/**
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
...
...
@@ -107,7 +114,7 @@ public:
class
OPENMM_EXPORT_CUDA
CudaPlatform
::
PlatformData
{
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
);
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
...
...
platforms/cuda/sharedTarget/CMakeLists.txt
View file @
19d2885a
...
...
@@ -19,6 +19,11 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
CUDA_CUDA_LIBRARY
}
${
CUDA_cufft_LIBRARY
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_CUDA_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_CUDA_BUILDING_SHARED_LIBRARY"
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
(
APPLE
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/cuda/src/CudaContext.cpp
View file @
19d2885a
...
...
@@ -72,9 +72,12 @@ const int CudaContext::TileSize = sizeof(tileflags)*8;
bool
CudaContext
::
hasInitializedCuda
=
false
;
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
if
(
hostCompiler
.
size
()
>
0
)
this
->
compiler
=
compiler
+
" --compiler-bindir "
+
hostCompiler
;
if
(
!
hasInitializedCuda
)
{
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
hasInitializedCuda
=
true
;
...
...
@@ -153,9 +156,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
int
numThreadBlocksPerComputeUnit
=
6
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
bonded
=
new
CudaBondedUtilities
(
*
this
);
nonbonded
=
new
CudaNonbondedUtilities
(
*
this
);
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
...
...
@@ -166,9 +166,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed2"
]
=
"make_double2"
;
compilationDefines
[
"make_mixed3"
]
=
"make_double3"
;
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
if
(
useMixedPrecision
)
{
posq
=
CudaArray
::
create
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
...
...
@@ -181,9 +178,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed2"
]
=
"make_double2"
;
compilationDefines
[
"make_mixed3"
]
=
"make_double3"
;
compilationDefines
[
"make_mixed4"
]
=
"make_double4"
;
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
{
posq
=
CudaArray
::
create
<
float4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
...
...
@@ -194,9 +188,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_mixed2"
]
=
"make_float2"
;
compilationDefines
[
"make_mixed3"
]
=
"make_float3"
;
compilationDefines
[
"make_mixed4"
]
=
"make_float4"
;
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
posCellOffsets
.
resize
(
paddedNumAtoms
,
make_int4
(
0
,
0
,
0
,
0
));
...
...
@@ -233,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// Create utilities objects.
bonded
=
new
CudaBondedUtilities
(
*
this
);
nonbonded
=
new
CudaNonbondedUtilities
(
*
this
);
integration
=
new
CudaIntegrationUtilities
(
*
this
,
system
);
expression
=
new
CudaExpressionUtilities
(
*
this
);
}
...
...
@@ -280,6 +273,22 @@ CudaContext::~CudaContext() {
void
CudaContext
::
initialize
()
{
cuCtxSetCurrent
(
context
);
string
errorMessage
=
"Error initializing Context"
;
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
double
mass
=
system
.
getParticleMass
(
i
);
if
(
useDoublePrecision
||
useMixedPrecision
)
...
...
@@ -441,13 +450,13 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
out
.
close
();
#ifdef WIN32
#ifdef _DEBUG
string
command
=
"
\"
"
+
compiler
+
"
\"
--ptx -G -g --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
string
command
=
compiler
+
" --ptx -G -g --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
#else
string
command
=
"
\"
"
+
compiler
+
"
\"
--ptx -lineinfo --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
string
command
=
compiler
+
" --ptx -lineinfo --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
#endif
int
res
=
compileInWindows
(
command
);
#else
string
command
=
"
\"
"
+
compiler
+
"
\"
--ptx --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o
\"
"
+
outputFile
+
"
\"
"
+
options
+
"
\"
"
+
inputFile
+
"
\"
2>
\"
"
+
logFile
+
"
\"
"
;
string
command
=
compiler
+
" --ptx --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o
\"
"
+
outputFile
+
"
\"
"
+
options
+
"
\"
"
+
inputFile
+
"
\"
2>
\"
"
+
logFile
+
"
\"
"
;
int
res
=
std
::
system
(
command
.
c_str
());
#endif
try
{
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
19d2885a
...
...
@@ -1460,8 +1460,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
numParticles
=
force
.
getNumParticles
();
sigmaEpsilon
=
CudaArray
::
create
<
float2
>
(
cu
,
cu
.
getPaddedNumAtoms
(),
"sigmaEpsilon"
);
CudaArray
&
posq
=
cu
.
getPosq
();
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
vector
<
double4
>
temp
(
posq
.
getSize
());
float4
*
posqf
=
(
float4
*
)
&
temp
[
0
];
double4
*
posqd
=
(
double4
*
)
&
temp
[
0
];
vector
<
float2
>
sigmaEpsilonVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
0
,
0
));
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
double
sumSquaredCharges
=
0.0
;
...
...
@@ -1486,7 +1487,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
posq
.
upload
(
cu
.
getPinnedBuffer
()
);
posq
.
upload
(
&
temp
[
0
]
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
...
...
@@ -2410,8 +2411,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cu
.
addAutoclearBuffer
(
*
bornSum
);
cu
.
addAutoclearBuffer
(
*
bornForce
);
CudaArray
&
posq
=
cu
.
getPosq
();
float4
*
posqf
=
(
float4
*
)
cu
.
getPinnedBuffer
();
double4
*
posqd
=
(
double4
*
)
cu
.
getPinnedBuffer
();
vector
<
double4
>
temp
(
posq
.
getSize
());
float4
*
posqf
=
(
float4
*
)
&
temp
[
0
];
double4
*
posqd
=
(
double4
*
)
&
temp
[
0
];
vector
<
float2
>
paramsVector
(
cu
.
getPaddedNumAtoms
(),
make_float2
(
1
,
1
));
const
double
dielectricOffset
=
0.009
;
for
(
int
i
=
0
;
i
<
force
.
getNumParticles
();
i
++
)
{
...
...
@@ -2424,7 +2426,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
else
posqf
[
i
]
=
make_float4
(
0
,
0
,
0
,
(
float
)
charge
);
}
posq
.
upload
(
cu
.
getPinnedBuffer
()
);
posq
.
upload
(
&
temp
[
0
]
);
params
->
upload
(
paramsVector
);
prefactor
=
-
ONE_4PI_EPS0
*
((
1.0
/
force
.
getSoluteDielectric
())
-
(
1.0
/
force
.
getSolventDielectric
()));
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
);
...
...
@@ -2600,6 +2602,8 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
delete
computedValues
;
if
(
energyDerivs
!=
NULL
)
delete
energyDerivs
;
if
(
energyDerivChain
!=
NULL
)
delete
energyDerivChain
;
if
(
longEnergyDerivs
!=
NULL
)
delete
longEnergyDerivs
;
if
(
globals
!=
NULL
)
...
...
@@ -2743,6 +2747,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
}
longEnergyDerivs
=
CudaArray
::
create
<
long
long
>
(
cu
,
force
.
getNumComputedValues
()
*
cu
.
getPaddedNumAtoms
(),
"customGBLongEnergyDerivatives"
);
energyDerivs
=
new
CudaParameterSet
(
cu
,
force
.
getNumComputedValues
(),
cu
.
getPaddedNumAtoms
(),
"customGBEnergyDerivatives"
,
true
);
energyDerivChain
=
new
CudaParameterSet
(
cu
,
force
.
getNumComputedValues
(),
cu
.
getPaddedNumAtoms
(),
"customGBEnergyDerivativeChain"
,
true
);
// Create the kernels.
...
...
@@ -3009,6 +3014,11 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
extraArgs
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivChain
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivChain
->
getBuffers
()[
i
];
string
index
=
cu
.
intToString
(
i
+
1
);
extraArgs
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ derivChain"
<<
index
;
}
extraArgs
<<
", const long long* __restrict__ derivBuffersIn"
;
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
load
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
...
...
@@ -3054,6 +3064,10 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
// Record values.
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
string
index
=
cu
.
intToString
(
i
+
1
);
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
compute
<<
"forceBuffers[index] += (long long) (force.x*0x100000000);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] += (long long) (force.y*0x100000000);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] += (long long) (force.z*0x100000000);
\n
"
;
...
...
@@ -3066,7 +3080,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
string
index
=
cu
.
intToString
(
i
+
1
);
compute
<<
"deriv
Buffers
"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
compute
<<
"deriv
Chain
"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
...
...
@@ -3204,9 +3218,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
if
(
chainStr
.
find
(
paramName
+
"1"
)
!=
chainStr
.
npos
||
chainStr
.
find
(
paramName
+
"2"
)
!=
chainStr
.
npos
)
parameters
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDeriv
s
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDeriv
Chain
->
getBuffers
().
size
();
i
++
)
{
if
(
needChainForValue
[
i
])
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDeriv
s
->
getBuffers
()[
i
];
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDeriv
Chain
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"dEdV"
+
cu
.
intToString
(
i
+
1
);
parameters
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
...
...
@@ -3352,6 +3366,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
perParticleEnergyArgs
.
push_back
(
&
computedValues
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
perParticleEnergyArgs
.
push_back
(
&
energyDerivs
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivChain
->
getBuffers
().
size
();
i
++
)
perParticleEnergyArgs
.
push_back
(
&
energyDerivChain
->
getBuffers
()[
i
].
getMemory
());
perParticleEnergyArgs
.
push_back
(
&
longEnergyDerivs
->
getDevicePointer
());
if
(
tabulatedFunctionParams
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
19d2885a
...
...
@@ -90,6 +90,7 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaUseCpuPme
());
platformProperties
.
push_back
(
CudaCompiler
());
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaHostCompiler
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
...
...
@@ -114,6 +115,8 @@ CudaPlatform::CudaPlatform() {
string
tmp
=
(
tmpdir
==
NULL
?
string
(
P_tmpdir
)
:
string
(
tmpdir
));
setPropertyDefaultValue
(
CudaTempDirectory
(),
tmp
);
#endif
char
*
hostCompiler
=
getenv
(
"CUDA_HOST_COMPILER"
);
setPropertyDefaultValue
(
CudaHostCompiler
(),
(
hostCompiler
==
NULL
?
""
:
string
(
hostCompiler
)));
}
double
CudaPlatform
::
getSpeed
()
const
{
...
...
@@ -149,6 +152,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaCompiler
())
:
properties
.
find
(
CudaCompiler
())
->
second
);
const
string
&
tempPropValue
=
(
properties
.
find
(
CudaTempDirectory
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaTempDirectory
())
:
properties
.
find
(
CudaTempDirectory
())
->
second
);
const
string
&
hostCompilerPropValue
=
(
properties
.
find
(
CudaHostCompiler
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
...
...
@@ -156,7 +161,7 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
cpuPmePropValue
=
"false"
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
));
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -165,7 +170,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
size_t
searchPos
=
0
,
nextPos
;
...
...
@@ -174,15 +179,24 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
searchPos
=
nextPos
+
1
;
}
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
unsigned
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
CudaContext
(
system
,
deviceIndex
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
*
this
));
try
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
unsigned
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
CudaContext
(
system
,
deviceIndex
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
));
}
}
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
CudaContext
(
system
,
-
1
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
));
}
catch
(...)
{
// If an exception was thrown, do our best to clean up memory.
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
delete
contexts
[
i
];
throw
;
}
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
CudaContext
(
system
,
-
1
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
*
this
));
stringstream
deviceIndex
,
deviceName
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
if
(
i
>
0
)
{
...
...
@@ -202,6 +216,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues
[
CudaPlatform
::
CudaUseCpuPme
()]
=
useCpuPme
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaCompiler
()]
=
compilerProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
19d2885a
...
...
@@ -65,7 +65,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
real
dEdR
=
0
;
...
...
@@ -117,7 +117,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
tj
;
real
dEdR
=
0
;
...
...
@@ -268,7 +268,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
...
...
@@ -313,7 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
real
dEdR
=
0
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
19d2885a
...
...
@@ -60,7 +60,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
real
tempValue1
=
0
;
...
...
@@ -108,7 +108,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
tj
;
real
tempValue1
=
0
;
...
...
@@ -241,7 +241,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
real
tempValue1
=
0
;
...
...
@@ -275,7 +275,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
real
tempValue1
=
0
;
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
View file @
19d2885a
...
...
@@ -58,7 +58,7 @@ extern "C" __global__ void computeInteractionGroups(
if
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
real
dEdR
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
19d2885a
...
...
@@ -116,7 +116,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if
(
atom1
<
NUM_ATOMS
&&
y
*
TILE_SIZE
+
j
<
NUM_ATOMS
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
float2
params2
=
make_float2
(
localData
[
tbx
+
j
].
radius
,
localData
[
tbx
+
j
].
scaledRadius
);
real
rScaledRadiusJ
=
r
+
params2
.
y
;
if
((
j
!=
tgx
)
&&
(
params1
.
x
<
rScaledRadiusJ
))
{
...
...
@@ -163,7 +163,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if
(
atom1
<
NUM_ATOMS
&&
y
*
TILE_SIZE
+
tj
<
NUM_ATOMS
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
float2
params2
=
make_float2
(
localData
[
tbx
+
tj
].
radius
,
localData
[
tbx
+
tj
].
scaledRadius
);
real
rScaledRadiusJ
=
r
+
params2
.
y
;
if
(
params1
.
x
<
rScaledRadiusJ
)
{
...
...
@@ -305,7 +305,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
atom2
=
atomIndices
[
tbx
+
tj
];
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
float2
params2
=
make_float2
(
localData
[
tbx
+
tj
].
radius
,
localData
[
tbx
+
tj
].
scaledRadius
);
real
rScaledRadiusJ
=
r
+
params2
.
y
;
if
(
params1
.
x
<
rScaledRadiusJ
)
{
...
...
@@ -355,7 +355,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
float2
params2
=
make_float2
(
localData
[
tbx
+
tj
].
radius
,
localData
[
tbx
+
tj
].
scaledRadius
);
real
rScaledRadiusJ
=
r
+
params2
.
y
;
if
(
params1
.
x
<
rScaledRadiusJ
)
{
...
...
@@ -461,7 +461,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
real
bornRadius2
=
localData
[
tbx
+
j
].
bornRadius
;
real
alpha2_ij
=
bornRadius1
*
bornRadius2
;
real
D_ij
=
r2
*
RECIP
(
4.0
f
*
alpha2_ij
);
...
...
@@ -518,7 +518,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
real
bornRadius2
=
localData
[
tbx
+
tj
].
bornRadius
;
real
alpha2_ij
=
bornRadius1
*
bornRadius2
;
real
D_ij
=
r2
*
RECIP
(
4.0
f
*
alpha2_ij
);
...
...
@@ -672,7 +672,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
real
bornRadius2
=
localData
[
tbx
+
tj
].
bornRadius
;
real
alpha2_ij
=
bornRadius1
*
bornRadius2
;
real
D_ij
=
r2
*
RECIP
(
4.0
f
*
alpha2_ij
);
...
...
@@ -723,7 +723,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
real
bornRadius2
=
localData
[
tbx
+
tj
].
bornRadius
;
real
alpha2_ij
=
bornRadius1
*
bornRadius2
;
real
D_ij
=
r2
*
RECIP
(
4.0
f
*
alpha2_ij
);
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
19d2885a
...
...
@@ -161,7 +161,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
#ifdef USE_SYMMETRIC
...
...
@@ -232,7 +232,7 @@ extern "C" __global__ void computeNonbonded(
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
tj
;
#ifdef USE_SYMMETRIC
...
...
@@ -433,7 +433,7 @@ extern "C" __global__ void computeNonbonded(
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
#ifdef USE_SYMMETRIC
...
...
@@ -507,7 +507,7 @@ extern "C" __global__ void computeNonbonded(
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
)
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
#ifdef USE_SYMMETRIC
...
...
platforms/cuda/tests/CMakeLists.txt
View file @
19d2885a
...
...
@@ -25,6 +25,11 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
(
APPLE
)
IF
(
${
TEST_ROOT
}
STREQUAL
"TestCUDAGBSAOBCForce2"
)
...
...
platforms/cuda/tests/TestCudaCustomGBForce.cpp
View file @
19d2885a
...
...
@@ -248,7 +248,7 @@ void testMembrane() {
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
++
i
)
norm
+=
forces
[
i
].
dot
(
forces
[
i
]);
norm
=
std
::
sqrt
(
norm
);
const
double
stepSize
=
1e-
3
;
const
double
stepSize
=
1e-
2
;
double
step
=
0.5
*
stepSize
/
norm
;
vector
<
Vec3
>
positions2
(
numParticles
),
positions3
(
numParticles
);
for
(
int
i
=
0
;
i
<
(
int
)
positions
.
size
();
++
i
)
{
...
...
platforms/cuda/tests/TestCudaGBSAOBCForce.cpp
View file @
19d2885a
...
...
@@ -71,7 +71,7 @@ void testSingleParticle() {
double
bornRadius
=
0.15
-
0.009
;
// dielectric offset
double
eps0
=
EPSILON0
;
double
bornEnergy
=
(
-
0.5
*
0.5
/
(
8
*
PI_M
*
eps0
))
*
(
1.0
/
gbsa
->
getSoluteDielectric
()
-
1.0
/
gbsa
->
getSolventDielectric
())
/
bornRadius
;
double
extendedRadius
=
bornRadius
+
0.14
;
// probe radius
double
extendedRadius
=
0.15
+
0.14
;
// probe radius
double
nonpolarEnergy
=
CAL2JOULE
*
PI_M
*
0.0216
*
(
10
*
extendedRadius
)
*
(
10
*
extendedRadius
)
*
std
::
pow
(
0.15
/
bornRadius
,
6.0
);
// Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
ASSERT_EQUAL_TOL
((
bornEnergy
+
nonpolarEnergy
),
state
.
getPotentialEnergy
(),
0.01
);
...
...
platforms/cuda/tests/TestCudaRandom.cpp
View file @
19d2885a
...
...
@@ -55,7 +55,8 @@ void testGaussian() {
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
19d2885a
...
...
@@ -65,7 +65,8 @@ void verifySorting(vector<float> array) {
System
system
;
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()));
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
platforms/opencl/CMakeLists.txt
View file @
19d2885a
...
...
@@ -14,10 +14,6 @@
# libOpenMMOpenCL_static[_d].a
#----------------------------------------------------
IF
(
APPLE
)
SET
(
CMAKE_OSX_DEPLOYMENT_TARGET
"10.6"
)
ENDIF
(
APPLE
)
set
(
OPENMM_BUILD_OPENCL_TESTS TRUE CACHE BOOL
"Whether to build OpenCL test cases"
)
if
(
OPENMM_BUILD_OPENCL_TESTS
)
SUBDIRS
(
tests
)
...
...
@@ -39,9 +35,9 @@ SET(STATIC_TARGET ${OPENMMOPENCL_LIBRARY_NAME}_static)
# Ensure that debug libraries have "_d" appended to their names.
# CMake gets this right on Windows automatically with this definition.
IF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
IF
(
MSVC
)
SET
(
CMAKE_DEBUG_POSTFIX
"_d"
CACHE INTERNAL
""
FORCE
)
ENDIF
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual Studio"
)
ENDIF
(
MSVC
)
# But on Unix or Cygwin we have to add the suffix manually
IF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment