Unverified Commit edbc8407 authored by peastman's avatar peastman Committed by GitHub
Browse files

Common compute framework to unify CUDA and OpenCL code (#2488)

* Began creating common compute framework to unify code between CUDA and OpenCL

* Began OpenCL implementation of common compute framework

* Common implementation of CMMotionRemover

* CUDA implementation of common compute interface

* Converted HarmonicBondForce to common compute API

* Converted standard bonded forces to common compute API

* Converted ExpressionUtilities to common compute API

* Created ComputeParameterSet

* Converted custom bonded forces to common compute API

* Converted CustomCentroidBondForce to common compute API

* Converted CustomManyParticleForce to common compute API

* Moved lots of duplicate code from CudaContext and OpenCLContext to ComputeContext

* Converted GayBerneForce to common compute API

* Removed obsolete kernels

* Converted verlet integrators to common compute API

* Converted Langevin and Brownian integrators to common compute API

* Converted CustomIntegrator to common compute API

* Converted CustomNonbondedForce to common compute API

* Removed uses of a deprecated API

* Fixed failing test cases

* Converted GBSAOBCForce to common compute API

* Began converting CustomGBForce to common compute API

* Finished converting CustomGBForce to common compute API

* Merged duplicated code in CudaIntegrationUtilities and OpenCLIntegrationUtilities

* Converted RMSDForce and AndersenThermostat to common compute API

* Converted CustomHbondForce to common compute API

* Merged scripts for encoding kernel sources

* Converted Drude plugin to common compute API

* Fixed errors in CMake scripts

* Attempt at fixing errors on Windows

* Added discussion of common compute API to developer guide

* Added Windows export macro for common classes

* Fixed error in CMMotionRemover

* Ubdated travis to newer Ubuntu version

* Fixed errors on CPU OpenCL

* Fixed Windows linking errors

* Added missing pragma for 32 bit atomics

* Replaced long long with mm_long

* More fixes to Windows linking

* Bug fix
parent 38beeefe
float2 drudeParams = PARAMS[index];
real4 force1 = 0;
real4 force2 = 0;
real4 force3 = 0;
real4 force4 = 0;
// First pair.
real4 delta = (real4) (pos1.xyz-pos3.xyz, 0);
real rInv = RSQRT(dot(delta, delta));
real r = RECIP(rInv);
real u = drudeParams.x*r;
real screening = 1-(1+0.5f*u)*EXP(-u);
real pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy;
real4 f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force1 += f;
force3 -= f;
// Second pair.
delta = (real4) (pos1.xyz-pos4.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force1 += f;
force4 -= f;
// Third pair.
delta = (real4) (pos2.xyz-pos3.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force2 += f;
force3 -= f;
// Fourth pair.
delta = (real4) (pos2.xyz-pos4.xyz, 0);
rInv = RSQRT(dot(delta, delta));
r = RECIP(rInv);
u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy;
f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
force2 += f;
force4 -= f;
real4 delta = (real4) (pos1.xyz-pos2.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float4 drudeParams = PARAMS[index];
float k1 = drudeParams.x;
float k2 = drudeParams.y;
float k3 = drudeParams.z;
// Compute the isotropic force.
energy += 0.5f*k3*r2;
real4 force1 = -delta*k3;
real4 force2 = delta*k3;
real4 force3 = 0;
real4 force4 = 0;
real4 force5 = 0;
// Compute the first anisotropic force.
if (k1 != 0) {
real4 dir = (real4) (pos2.xyz-pos3.xyz, 0);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
energy += 0.5f*k1*rprime*rprime;
real4 f1 = dir*(k1*rprime);
real4 f2 = (delta-dir*rprime)*(k1*rprime*invDist);
force1 -= f1;
force2 += f1-f2;
force3 += f2;
}
// Compute the second anisotropic force.
if (k2 != 0) {
real4 dir = (real4) (pos4.xyz-pos5.xyz, 0);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
energy += 0.5f*k2*rprime*rprime;
real4 f1 = dir*(k2*rprime);
real4 f2 = (delta-dir*rprime)*(k2*rprime*invDist);
force1 -= f1;
force2 += f1;
force4 -= f2;
force5 += f2;
}
......@@ -63,27 +63,28 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/cuda/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
# Set variables needed for encoding kernel sources into a C++ class
SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CUDA_SOURCE_CLASS CudaRpmdKernelSources)
SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
SET(KERNEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(KERNEL_SOURCE_CLASS CudaRpmdKernelSources)
SET(KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${KERNEL_SOURCE_CLASS}.cpp)
SET(KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${KERNEL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${KERNELS_CPP} ${KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
# Create the library
INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
FILE(GLOB CUDA_KERNELS ${KERNEL_SOURCE_DIR}/kernels/*.cu)
ADD_CUSTOM_COMMAND(OUTPUT ${KERNELS_CPP} ${KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/cuda/EncodeCUDAFiles.cmake
ARGS -D KERNEL_SOURCE_DIR=${KERNEL_SOURCE_DIR} -D KERNELS_CPP=${KERNELS_CPP} -D KERNELS_H=${KERNELS_H} -D KERNEL_SOURCE_CLASS=${KERNEL_SOURCE_CLASS} -D KERNEL_FILE_EXTENSION=cu -P ${CMAKE_SOURCE_DIR}/cmake_modules/EncodeKernelFiles.cmake
DEPENDS ${CUDA_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB})
......
......@@ -39,7 +39,7 @@ namespace OpenMM {
class CudaRpmdKernelSources {
public:
@CUDA_FILE_DECLARATIONS@
@KERNEL_FILE_DECLARATIONS@
};
} // namespace OpenMM
......
......@@ -307,7 +307,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
double4 periodicBoxSize = cu.getPeriodicBoxSize();
vector<Vec3> offsetPos(numParticles);
for (int i = 0; i < numParticles; ++i) {
int4 offset = cu.getPosCellOffsets()[i];
mm_int4 offset = cu.getPosCellOffsets()[i];
offsetPos[order[i]] = pos[order[i]] + Vec3(offset.x*periodicBoxSize.x, offset.y*periodicBoxSize.y, offset.z*periodicBoxSize.z);
}
......
......@@ -62,27 +62,28 @@ ENDFOREACH(subdir)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/include)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/src)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
# Set variables needed for encoding kernel sources into a C++ class
SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(CL_SOURCE_CLASS OpenCLRpmdKernelSources)
SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
SET(KERNEL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
SET(KERNEL_SOURCE_CLASS OpenCLRpmdKernelSources)
SET(KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${KERNEL_SOURCE_CLASS}.cpp)
SET(KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${KERNEL_SOURCE_CLASS}.h)
SET(SOURCE_FILES ${SOURCE_FILES} ${KERNELS_CPP} ${KERNELS_H})
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
# Create the library
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
FILE(GLOB OPENCL_KERNELS ${KERNEL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${KERNELS_CPP} ${KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/opencl/EncodeCLFiles.cmake
ARGS -D KERNEL_SOURCE_DIR=${KERNEL_SOURCE_DIR} -D KERNELS_CPP=${KERNELS_CPP} -D KERNELS_H=${KERNELS_H} -D KERNEL_SOURCE_CLASS=${KERNEL_SOURCE_CLASS} -D KERNEL_FILE_EXTENSION=cl -P ${CMAKE_SOURCE_DIR}/cmake_modules/EncodeKernelFiles.cmake
DEPENDS ${OPENCL_KERNELS}
)
SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
......
......@@ -39,7 +39,7 @@ namespace OpenMM {
class OpenCLRpmdKernelSources {
public:
@CL_FILE_DECLARATIONS@
@KERNEL_FILE_DECLARATIONS@
};
} // namespace OpenMM
......
......@@ -163,12 +163,12 @@ void testRandomSeed() {
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
integrator.step(20);
State state1 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
integrator.step(20);
State state2 = context.getState(State::Positions);
// Try twice with a different random seed.
......@@ -177,12 +177,12 @@ void testRandomSeed() {
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
integrator.step(20);
State state3 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
integrator.step(20);
State state4 = context.getState(State::Positions);
// Compare the results.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment