Commit 7a36f461 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

dded GB/VI to Cuda platform

Free energy plugin added
Plugin will not run w/ Obc or GB/VI forces unless line 2004 of gpu.cpp (gpu->sim.totalNonbondOutputBuffers  = 2*gpu->sim.nonbondOutputBuffers;) is commented in -- working on removing this constraint
Also unit tests for GB/VI currently fail 
parent 43ebedfb
# For more information, please see: http://software.sci.utah.edu
#
# The MIT License
#
# Copyright (c) 2007
# Scientific Computing and Imaging Institute, University of Utah
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# Make2cmake CMake Script
# Abe Stephens and James Bigler
# (c) 2007 Scientific Computing and Imaging Institute, University of Utah
# Note that the REGEX expressions may need to be tweaked for different dependency generators.
FILE(READ ${input_file} depend_text)
IF (${depend_text} MATCHES ".+")
# MESSAGE("FOUND DEPENDS")
# Remember, four backslashes is escaped to one backslash in the string.
STRING(REGEX REPLACE "\\\\ " " " depend_text ${depend_text})
# This works for the nvcc -M generated dependency files.
STRING(REGEX REPLACE "^.* : " "" depend_text ${depend_text})
STRING(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text})
FOREACH(file ${depend_text})
STRING(REGEX REPLACE "^ +" "" file ${file})
# IF (EXISTS ${file})
# MESSAGE("DEPEND = ${file}")
# ELSE (EXISTS ${file})
# MESSAGE("ERROR = ${file}")
# ENDIF(EXISTS ${file})
SET(cuda_nvcc_depend "${cuda_nvcc_depend} \"${file}\"\n")
ENDFOREACH(file)
ELSE(${depend_text} MATCHES ".+")
# MESSAGE("FOUND NO DEPENDS")
ENDIF(${depend_text} MATCHES ".+")
FILE(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n")
# For more information, please see: http://software.sci.utah.edu
#
# The MIT License
#
# Copyright (c) 2007
# Scientific Computing and Imaging Institute, University of Utah
#
# License for the specific language governing rights and limitations under
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# .cubin Parsing CMake Script
# Abe Stephens
# (c) 2007 Scientific Computing and Imaging Institute, University of Utah
FILE(READ ${input_file} file_text)
IF (${file_text} MATCHES ".+")
# Remember, four backslashes is escaped to one backslash in the string.
STRING(REGEX REPLACE ";" "\\\\;" file_text ${file_text})
STRING(REGEX REPLACE "\ncode" ";code" file_text ${file_text})
LIST(LENGTH file_text len)
FOREACH(line ${file_text})
# Only look at "code { }" blocks.
IF(line MATCHES "^code")
# Break into individual lines.
STRING(REGEX REPLACE "\n" ";" line ${line})
FOREACH(entry ${line})
# Extract kernel names.
IF (${entry} MATCHES "[^g]name = ([^ ]+)")
STRING(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry})
# Check to see if the kernel name starts with "_"
SET(skip FALSE)
# IF (${entry} MATCHES "^_")
# Skip the rest of this block.
# MESSAGE("Skipping ${entry}")
# SET(skip TRUE)
# ELSE (${entry} MATCHES "^_")
MESSAGE("Kernel: ${entry}")
# ENDIF (${entry} MATCHES "^_")
ENDIF(${entry} MATCHES "[^g]name = ([^ ]+)")
# Skip the rest of the block if necessary
IF(NOT skip)
# Registers
IF (${entry} MATCHES "reg = ([^ ]+)")
STRING(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry})
MESSAGE("Registers: ${entry}")
ENDIF(${entry} MATCHES "reg = ([^ ]+)")
# Local memory
IF (${entry} MATCHES "lmem = ([^ ]+)")
STRING(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry})
MESSAGE("Local: ${entry}")
ENDIF(${entry} MATCHES "lmem = ([^ ]+)")
# Shared memory
IF (${entry} MATCHES "smem = ([^ ]+)")
STRING(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry})
MESSAGE("Shared: ${entry}")
ENDIF(${entry} MATCHES "smem = ([^ ]+)")
IF (${entry} MATCHES "^}")
MESSAGE("")
ENDIF(${entry} MATCHES "^}")
ENDIF(NOT skip)
ENDFOREACH(entry)
ENDIF(line MATCHES "^code")
ENDFOREACH(line)
ELSE(${file_text} MATCHES ".+")
# MESSAGE("FOUND NO DEPENDS")
ENDIF(${file_text} MATCHES ".+")
#ifndef OPENMM_FREE_ENERGY_CUDA_KERNEL_FACTORY_H_
#define OPENMM_FREE_ENERGY_CUDA_KERNEL_FACTORY_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/KernelFactory.h"
namespace OpenMM {
/**
* This KernelFactory creates all kernels for CudaFreeEnergyPlatform.
*/
class CudaFreeEnergyKernelFactory : public KernelFactory {
public:
KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
};
} // namespace OpenMM
#endif /*OPENMM_FREE_ENERGY_CUDA_KERNEL_FACTORY_H_*/
#
# Include CUDA related files.
#
# ----------------------------------------------------------------------------
# logging
SET(LOG TRUE)
IF(LOG)
SET(LOG_FILE "CMakeLog.txt" )
FILE( WRITE ${LOG_FILE} "In freeEnergy/platforms/cuda/sharedTarget Cmake\n")
# FILE( APPEND ${LOG_FILE} "BROOK_LIB_PATH=${BROOK_LIB_PATH}\n")
ENDIF(LOG)
IF(LOG)
MACRO(LOG_DIR LOG_FILE DIR_LIST )
FILE( APPEND ${LOG_FILE} "\n${DIR_LIST}\n")
FOREACH(currentFile ${ARGN})
FILE( APPEND ${LOG_FILE} " ${currentFile}\n" )
ENDFOREACH(currentFile)
ENDMACRO(LOG_DIR)
ENDIF(LOG)
# ----------------------------------------------------------------------------
SET(OPENMM_BUILD_FREE_ENERGY_PATH ${CMAKE_SOURCE_DIR}/plugins/freeEnergy)
# ----------------------------------------------------------------------------
IF(LOG)
LOG_DIR( ${LOG_FILE} "Pre OPENMM_SOURCE_SUBDIRS" ${OPENMM_SOURCE_SUBDIRS} )
LOG_DIR( ${LOG_FILE} "Pre OPENMM_FREE_ENERGY_SOURCE_SUBDIRS " ${OPENMM_FREE_ENERGY_SOURCE_SUBDIRS} )
LOG_DIR( ${LOG_FILE} "Pre SOURCE_FILES" ${SOURCE_FILES} )
ENDIF(LOG)
## ----------------------------------------------------------------------------
SET(CUDA_NVCC_BUILD_FLAGS)
INCLUDE(${FINDCUDA_DIR}/FindCuda.cmake)
INCLUDE_DIRECTORIES(${CUDA_INCLUDE})
LINK_DIRECTORIES(${CUDA_TARGET_LINK})
FOREACH(subdir ${OPENMM_FREE_ENERGY_SOURCE_SUBDIRS})
FILE(GLOB src_files ${OPENMM_BUILD_FREE_ENERGY_PATH}/platforms/cuda/${subdir}/src/*.cu ${OPENMM_BUILD_FREE_ENERGY_PATH}/platforms/cuda/src/*/*.cu)
FOREACH(file ${src_files})
FILE(RELATIVE_PATH file ${OPENMM_BUILD_FREE_ENERGY_PATH}/platforms/cuda ${file})
SET(SOURCE_FILES ${SOURCE_FILES} ${file}) #append
ENDFOREACH(file)
CUDA_INCLUDE_DIRECTORIES(BEFORE ${OPENMM_BUILD_FREE_ENERGY_PATH}/platforms/cuda/../${subdir}/include)
ENDFOREACH(subdir)
# ----------------------------------------------------------------------------
IF(LOG)
LOG_DIR( ${LOG_FILE} "OPENMM_BUILD_FREE_ENERGY_PATH" ${OPENMM_BUILD_FREE_ENERGY_PATH} )
FILE( APPEND ${LOG_FILE} "OPENMM_BUILD_FREE_ENERGY_PATH=${OPENMM_BUILD_FREE_ENERGY_PATH}\n")
LOG_DIR( ${LOG_FILE} "OPENMM_SOURCE_SUBDIRS" ${OPENMM_SOURCE_SUBDIRS} )
LOG_DIR( ${LOG_FILE} "CMAKE_SOURCE_DIR" ${CMAKE_SOURCE_DIR} )
LOG_DIR( ${LOG_FILE} "CUDA_INCLUDE" ${CUDA_INCLUDE} )
LOG_DIR( ${LOG_FILE} "CUDA_TARGET_LINK" ${CUDA_TARGET_LINK} )
LOG_DIR( ${LOG_FILE} "SHARED_TARGET" ${SHARED_TARGET} )
LOG_DIR( ${LOG_FILE} "OPENMM_DIR" ${OPENMM_DIR} )
LOG_DIR( ${LOG_FILE} "SOURCE_FILES" ${SOURCE_FILES} )
ENDIF(LOG)
## ----------------------------------------------------------------------------
# CUDA_INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/jama/include)
CUDA_INCLUDE_DIRECTORIES(${OPENMM_BUILD_FREE_ENERGY_PATH}/platforms/cuda/../src
${OPENMM_DIR}/platforms/cuda/src
${OPENMM_DIR}/platforms/cuda/src/kernels
${OPENMM_DIR}/openmmapi/include )
CUDA_ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}_d optimized ${OPENMM_LIBRARY_NAME} )
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaFreeEnergyKernelFactory.h"
#include "CudaFreeEnergyKernels.h"
#include "openmm/freeEnergyKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
#if defined(OPENMM_BUILDING_SHARED_LIBRARY)
#if defined(WIN32)
#include <windows.h>
extern "C" void initOpenMMCudaFreeEnergyPlugin();
BOOL WINAPI DllMain(HANDLE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) {
if (ul_reason_for_call == DLL_PROCESS_ATTACH)
initOpenMMCudaFreeEnergyPlugin();
return TRUE;
}
#else
extern "C" void __attribute__((constructor)) initOpenMMCudaFreeEnergyPlugin();
#endif
#endif
using namespace OpenMM;
extern "C" void initOpenMMCudaFreeEnergyPlugin() {
if ( gpuIsAvailable() ){
CudaPlatform* cudaPlatform = new CudaPlatform();
CudaFreeEnergyKernelFactory* factory = new CudaFreeEnergyKernelFactory();
cudaPlatform->registerKernelFactory(CalcNonbondedSoftcoreForceKernel::Name(), factory);
cudaPlatform->registerKernelFactory(CalcGBSAOBCSoftcoreForceKernel::Name(), factory);
cudaPlatform->registerKernelFactory(CalcGBVISoftcoreForceKernel::Name(), factory);
Platform::registerPlatform(cudaPlatform);
}
}
KernelImpl* CudaFreeEnergyKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
if (name == CalcNonbondedSoftcoreForceKernel::Name())
return new CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(name, platform, data, context.getSystem());
if (name == CalcGBSAOBCSoftcoreForceKernel::Name())
return new CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(name, platform, data);
if (name == CalcGBVISoftcoreForceKernel::Name())
return new CudaFreeEnergyCalcGBVISoftcoreForceKernel(name, platform, data);
throw OpenMMException( (std::string("Tried to create kernel with illegal kernel name '") + name + "'").c_str() );
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaFreeEnergyKernels.h"
#include "openmm/Context.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/ContextImpl.h"
#include "kernels/gputypes.h"
#include "kernels/cudaKernels.h"
#include "kernels/GpuFreeEnergyCudaKernels.h"
#include "kernels/GpuLJ14Softcore.h"
#include <cmath>
using namespace OpenMM;
#include <map>
typedef std::map< std::string, int > MapStringInt;
typedef MapStringInt::iterator MapStringIntI;
typedef MapStringInt::const_iterator MapStringIntCI;
// force names
const std::string HARMONIC_BOND_FORCE = "HarmonicBond";
const std::string HARMONIC_ANGLE_FORCE = "HarmonicBond";
const std::string PERIODIC_TORSION_FORCE = "PeriodicTorsion";
const std::string RB_TORSION_FORCE = "RbTorsion";
const std::string NB_FORCE = "Nb";
const std::string NB_SOFTCORE_FORCE = "NbSoftcore";
const std::string NB_EXCEPTION_FORCE = "NbException";
const std::string NB_EXCEPTION_SOFTCORE_FORCE = "NbSoftcoreException";
const std::string GBSA_OBC_FORCE = "Obc";
const std::string GBSA_OBC_SOFTCORE_FORCE = "ObcSoftcore";
const std::string GBVI_FORCE = "GBVI";
const std::string GBVI_SOFTCORE_FORCE = "GBVISoftcore";
static void getForceMap(const System& system, MapStringInt& forceMap) {
// check forces and relevant parameters
for(int i = 0; i < system.getNumForces(); ++i) {
int hit = 0;
const Force& force = system.getForce(i);
// bond
if( !hit ){
try {
const HarmonicBondForce& harmonicBondForce = dynamic_cast<const HarmonicBondForce&>(force);
forceMap[HARMONIC_BOND_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// angle
if( !hit ){
try {
const HarmonicAngleForce& harmonicAngleForce = dynamic_cast<const HarmonicAngleForce&>(force);
forceMap[HARMONIC_ANGLE_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// PeriodicTorsionForce
if( !hit ){
try {
const PeriodicTorsionForce & periodicTorsionForce = dynamic_cast<const PeriodicTorsionForce&>(force);
forceMap[PERIODIC_TORSION_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// RBTorsionForce
if( !hit ){
try {
const RBTorsionForce& rBTorsionForce = dynamic_cast<const RBTorsionForce&>(force);
forceMap[RB_TORSION_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// nonbonded
if( !hit ){
try {
const NonbondedForce& nbForce = dynamic_cast<const NonbondedForce&>(force);
forceMap[NB_FORCE] = 1;
} catch( std::bad_cast ){
}
}
// nonbonded softcore
if( !hit ){
try {
const NonbondedSoftcoreForce& nbForce = dynamic_cast<const NonbondedSoftcoreForce&>(force);
forceMap[NB_SOFTCORE_FORCE] = 1;
} catch( std::bad_cast ){
}
}
// GBSA OBC
if( !hit ){
try {
const GBSAOBCForce& obcForce = dynamic_cast<const GBSAOBCForce&>(force);
forceMap[GBSA_OBC_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// GBSA OBC softcore
if( !hit ){
try {
const GBSAOBCSoftcoreForce& obcForce = dynamic_cast<const GBSAOBCSoftcoreForce&>(force);
forceMap[GBSA_OBC_SOFTCORE_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// GB/VI
if( !hit ){
try {
const GBVIForce& obcForce = dynamic_cast<const GBVIForce&>(force);
forceMap[GBVI_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
// GB/VI softcore
if( !hit ){
try {
const GBVISoftcoreForce& gbviForce = dynamic_cast<const GBVISoftcoreForce&>(force);
forceMap[GBVI_SOFTCORE_FORCE] = 1;
hit++;
} catch( std::bad_cast ){
}
}
}
}
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel() {
if( log ){
(void) fprintf( log, "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel destructor called.\n" );
(void) fflush( log );
}
delete gpuNonbondedSoftcore;
delete gpuLJ14Softcore;
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& system, const NonbondedSoftcoreForce& force) {
// ---------------------------------------------------------------------------------------
static const std::string methodName = "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize";
// ---------------------------------------------------------------------------------------
if( log ){
(void) fprintf( log, "%s called.\n", methodName.c_str() );
(void) fflush( log );
}
// check forces and relevant parameters
MapStringInt forceMap;
getForceMap( system, forceMap);
int softcore = 0;
if( forceMap.find( GBSA_OBC_FORCE ) != forceMap.end() ){
setIncludeGBSA( true );
}
if( forceMap.find( GBSA_OBC_SOFTCORE_FORCE ) != forceMap.end() ){
setIncludeGBSA( true );
softcore++;
}
if( forceMap.find( GBVI_FORCE ) != forceMap.end() ){
setIncludeGBVI( true );
}
if( forceMap.find( GBVI_SOFTCORE_FORCE ) != forceMap.end() ){
setIncludeGBVI( true );
softcore++;
}
if( forceMap.find( NB_SOFTCORE_FORCE ) != forceMap.end() ){
softcore++;
}
setIncludeSoftcore( softcore );
numParticles = force.getNumParticles();
_gpuContext* gpu = data.gpu;
// Identify which exceptions are 1-4 interactions.
std::vector<pair<int, int> > exclusions;
std::vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon, softcoreLJLambda;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon, softcoreLJLambda);
exclusions.push_back(pair<int, int>(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
{
std::vector<int> particle(numParticles);
std::vector<float> c6(numParticles);
std::vector<float> c12(numParticles);
std::vector<float> q(numParticles);
std::vector<float> softcoreLJLambdaArray(numParticles);
std::vector<char> symbol;
std::vector<std::vector<int> > exclusionList(numParticles);
float minSoftcoreLJLambda = 1.0e+20f;
for (int i = 0; i < numParticles; i++) {
double charge, radius, depth, softcoreLJLambda;
force.getParticleParameters(i, charge, radius, depth, softcoreLJLambda);
particle[i] = i;
q[i] = static_cast<float>( charge );
c6[i] = static_cast<float>( (4*depth*pow(radius, 6.0)) );
c12[i] = static_cast<float>( (4*depth*pow(radius, 12.0)) );
softcoreLJLambdaArray[i] = static_cast<float>( softcoreLJLambda );
if( minSoftcoreLJLambda > softcoreLJLambda ){
minSoftcoreLJLambda = softcoreLJLambda;
}
exclusionList[i].push_back(i);
}
for (int i = 0; i < (int)exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
gpuSetPeriodicBoxSize(gpu, static_cast<float>(boxVectors[0][0] ), static_cast<float>(boxVectors[1][1] ), static_cast<float>(boxVectors[2][2] ));
CudaNonbondedMethod method = NO_CUTOFF;
if (force.getNonbondedMethod() != NonbondedSoftcoreForce::NoCutoff) {
gpuSetNonbondedCutoff(gpu, static_cast<float>(force.getCutoffDistance() ), force.getReactionFieldDielectric());
method = CUTOFF;
}
if (force.getNonbondedMethod() == NonbondedSoftcoreForce::CutoffPeriodic) {
method = PERIODIC;
}
if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald || force.getNonbondedMethod() == NonbondedSoftcoreForce::PME) {
double ewaldErrorTol = force.getEwaldErrorTolerance();
double alpha = (1.0/force.getCutoffDistance())*std::sqrt(-std::log(ewaldErrorTol));
double mx = boxVectors[0][0]/force.getCutoffDistance();
double my = boxVectors[1][1]/force.getCutoffDistance();
double mz = boxVectors[2][2]/force.getCutoffDistance();
double pi = 3.1415926535897932385;
int kmaxx = (int)std::ceil(-(mx/pi)*std::log(ewaldErrorTol));
int kmaxy = (int)std::ceil(-(my/pi)*std::log(ewaldErrorTol));
int kmaxz = (int)std::ceil(-(mz/pi)*std::log(ewaldErrorTol));
if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald) {
if (kmaxx%2 == 0)
kmaxx++;
if (kmaxy%2 == 0)
kmaxy++;
if (kmaxz%2 == 0)
kmaxz++;
gpuSetEwaldParameters(gpu, static_cast<float>( alpha ), kmaxx, kmaxy, kmaxz);
method = EWALD;
}
else {
int gridSizeX = kmaxx*3;
int gridSizeY = kmaxy*3;
int gridSizeZ = kmaxz*3;
gridSizeX = ((gridSizeX+3)/4)*4;
gridSizeY = ((gridSizeY+3)/4)*4;
gridSizeZ = ((gridSizeZ+3)/4)*4;
gpuSetPMEParameters(gpu, static_cast<float>( alpha ), gridSizeX, gridSizeY, gridSizeZ);
method = PARTICLE_MESH_EWALD;
}
}
data.nonbondedMethod = method;
// setup parameters
gpuNonbondedSoftcore = gpuSetNonbondedSoftcoreParameters(gpu, 138.935485f, particle, c6, c12, q,
softcoreLJLambdaArray, symbol, exclusionList, method);
// Compute the Ewald self energy.
data.ewaldSelfEnergy = 0.0;
if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald || force.getNonbondedMethod() == NonbondedSoftcoreForce::PME) {
double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
for (int i = 0; i < numParticles; i++)
data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
}
}
// Initialize 1-4 nonbonded interactions.
{
numExceptions = exceptions.size();
std::vector<int> particle1(numExceptions);
std::vector<int> particle2(numExceptions);
std::vector<float> c6(numExceptions);
std::vector<float> c12(numExceptions);
std::vector<float> q1(numExceptions);
std::vector<float> q2(numExceptions);
std::vector<float> softcoreLJLambdaArray(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double charge, sig, eps, softcoreLJLambda;
force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps, softcoreLJLambda);
c6[i] = static_cast<float>( (4*eps*pow(sig, 6.0)) );
c12[i] = static_cast<float>( (4*eps*pow(sig, 12.0)) );
q1[i] = static_cast<float>( charge );
q2[i] = 1.0f;
softcoreLJLambdaArray[i] = static_cast<float>( softcoreLJLambda );
}
gpuLJ14Softcore = gpuSetLJ14SoftcoreParameters(gpu, 138.935485f, 1.0f, particle1, particle2, c6, c12, q1, q2, softcoreLJLambdaArray);
}
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl& context) {
// ---------------------------------------------------------------------------------------
static const std::string methodName = "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces";
// ---------------------------------------------------------------------------------------
_gpuContext* gpu = data.gpu;
// write array, ... address's to board
if( setSim == 0 ){
setSim++;
if( log ){
(void) fprintf( log, "%s Obc=%d GB/VI=%d exceptions=%d\n",
methodName.c_str(), getIncludeGBSA(), getIncludeGBVI(), getNumExceptions() );
(void) fflush( log );
}
SetCalculateLocalSoftcoreGpuSim( gpu );
SetCalculateCDLJSoftcoreGpuSim( gpu );
// (void) fprintf( log, "Calling SetCalculateLocalSoftcoreGpuSim\n" ); fflush( stderr );
// flip strides (unsure if this is needed)
#if 0
(void) fprintf( stderr, "flipping gpuLJ14Softcore\n" ); fflush( stderr );
GpuLJ14Softcore* gpuLJ14Softcore = getGpuLJ14Softcore( );
if( gpuLJ14Softcore ){
gpuLJ14Softcore->flipStrides( gpu );
if( log ){
(void) fprintf( log, "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces flipping LJ14\n" );
(void) fflush( log );
}
}
#endif
}
// calculate nonbonded ixns here, only if implicit solvent is inactive
if ( !getIncludeGBSA() && !getIncludeGBVI() ) {
//kClearForces(gpu);
kCalculateCDLJSoftcoreForces(gpu);
}
// local LJ-14 forces
kCalculateLocalSoftcoreForces(gpu);
//kReduceForces(gpu);
//exit(0);
}
double CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
bool CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getIncludeGBSA( void ) const {
return bIncludeGBSA;
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setIncludeGBSA( bool inputIncludeGBSA ){
bIncludeGBSA = inputIncludeGBSA;
}
bool CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getIncludeGBVI( void ) const {
return bIncludeGBVI;
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setIncludeGBVI( bool inputIncludeGBVI ){
bIncludeGBVI = inputIncludeGBVI;
}
bool CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getIncludeSoftcore( void ) const {
return bIncludeSoftcore;
}
int CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getNumExceptions( void ) const {
return numExceptions;
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setIncludeSoftcore( bool inputIncludeSoftcore ){
bIncludeSoftcore = inputIncludeSoftcore;
}
GpuLJ14Softcore* CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getGpuLJ14Softcore( void ) const {
return gpuLJ14Softcore;
}
void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setGpuLJ14Softcore( GpuLJ14Softcore* inputGpuLJ14Softcore ){
gpuLJ14Softcore = inputGpuLJ14Softcore;
}
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel() {
delete gpuObcGbsaSoftcore;
}
void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize(const System& system, const GBSAOBCSoftcoreForce& force) {
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize";
// ---------------------------------------------------------------------------------------
_gpuContext* gpu = data.gpu;
MapStringInt forceMap;
getForceMap( system, forceMap);
// check that nonbonded (non-softcore is not active)
if( forceMap.find( NB_FORCE ) != forceMap.end() ){
throw OpenMMException( "Mixing NonbondedForce and GBSAOBCSoftoreForce not allowed -- use NonbondedSoftcoreForce " );
}
int numParticles = system.getNumParticles();
std::vector<float> radius(numParticles);
std::vector<float> scale(numParticles);
std::vector<float> charge(numParticles);
std::vector<float> nonPolarScalingFactors(numParticles);
for (int i = 0; i < numParticles; i++) {
double particleCharge, particleRadius, scalingFactor, particleNonPolarScalingFactor;
force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor, particleNonPolarScalingFactor);
radius[i] = static_cast<float>( particleRadius);
scale[i] = static_cast<float>( scalingFactor);
charge[i] = static_cast<float>( particleCharge);
nonPolarScalingFactors[i] = static_cast<float>( particleNonPolarScalingFactor);
}
gpuObcGbsaSoftcore = gpuSetObcSoftcoreParameters(gpu, static_cast<float>( force.getSoluteDielectric()),
static_cast<float>( force.getSolventDielectric()),
radius, scale, charge, nonPolarScalingFactors );
}
void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& context) {
// ---------------------------------------------------------------------------------------
static const std::string methodName = "CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces";
// ---------------------------------------------------------------------------------------
_gpuContext* gpu = data.gpu;
int debug = 1;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize() was called
if( setSim == 0 ){
setSim++;
SetCalculateObcGbsaSoftcoreBornSumSim( gpu );
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
//SetCalculateObcGbsaForces2Sim( gpu );
SetCalculateObcGbsaSoftcoreForces2Sim( gpu );
}
// required!!
gpu->bRecalculateBornRadii = true;
// calculate Born radii and first loop of Obc forces
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1\n", methodName.c_str() );
(void) fflush( stderr );
}
kClearBornForces(gpu);
kCalculateObcGbsaSoftcoreBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kReduceObcGbsaBornForces\n", methodName.c_str() );
(void) fflush( stderr );
}
// compute Born forces
gpu->bIncludeGBSA = true;
kReduceObcGbsaBornForces(gpu);
gpu->bIncludeGBSA = false;
if( debug && log ){
(void) fprintf( stderr, "\n%s calling kCalculateObcGbsaForces2\n", methodName.c_str() );
(void) fflush( stderr );
}
// second loop of Obc GBSA forces
//kCalculateObcGbsaForces2(gpu);
kCalculateObcGbsaSoftcoreForces2(gpu);
}
double CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeEnergy(ContextImpl& context) {
executeForces( context );
return 0.0;
}
CudaFreeEnergyCalcGBVISoftcoreForceKernel::~CudaFreeEnergyCalcGBVISoftcoreForceKernel() {
if( log ){
(void) fprintf( log, "CudaFreeEnergyCalcGBVISoftcoreForceKernel destructor called -- freeing gpuGBVISoftcore.\n" );
(void) fflush( log );
}
delete gpuGBVISoftcore;
}
void CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize(const System& system, const GBVISoftcoreForce& force, const std::vector<double> & inputScaledRadii) {
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize";
// ---------------------------------------------------------------------------------------
int numParticles = system.getNumParticles();
_gpuContext* gpu = data.gpu;
// check forces and relevant parameters
MapStringInt forceMap;
getForceMap( system, forceMap);
// check that nonbonded (non-softcore is not active)
if( forceMap.find( NB_FORCE ) != forceMap.end() ){
throw OpenMMException( "Mixing NonbondedForce and GBVISoftoreForce not allowed -- use NonbondedSoftcoreForce " );
}
std::vector<int> particle(numParticles);
std::vector<float> radius(numParticles);
std::vector<float> scaledRadii(numParticles);
std::vector<float> gammas(numParticles);
std::vector<float> bornRadiusScaleFactors(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, particleRadius, gamma, bornRadiusScaleFactor;
force.getParticleParameters(i, charge, particleRadius, gamma, bornRadiusScaleFactor);
particle[i] = i;
radius[i] = static_cast<float>( particleRadius );
gammas[i] = static_cast<float>( gamma );
scaledRadii[i] = static_cast<float>( inputScaledRadii[i] );
bornRadiusScaleFactors[i] = static_cast<float>( bornRadiusScaleFactor );
}
// tanh not implemented
// std::vector<float> tanhScaleFactors;
std::vector<float> quinticSplineParameters;
if( force.getBornRadiusScalingMethod() == GBVISoftcoreForce::Tanh ){
/*
double alpha, beta, gamma;
force.getTanhParameters( alpha, beta, gamma );
tanhScaleFactors.resize( 3 );
tanhScaleFactors[0] = static_cast<float>(alpha);
tanhScaleFactors[1] = static_cast<float>(beta);
tanhScaleFactors[2] = static_cast<float>(gamma);
*/
} else if( force.getBornRadiusScalingMethod() == GBVISoftcoreForce::QuinticSpline ){
// quintic spline
quinticSplineParameters.resize(2);
quinticSplineParameters[0] = static_cast<float>(force.getQuinticLowerLimitFactor());
quinticSplineParameters[1] = static_cast<float>(force.getQuinticUpperBornRadiusLimit());
quinticSplineParameters[1] = powf( quinticSplineParameters[1], -3.0f );
setQuinticScaling( 1 );
}
// load parameters onto board
// defined in kCalculateGBVISoftcore.cu
gpuGBVISoftcore = gpuSetGBVISoftcoreParameters(gpu, static_cast<float>( force.getSoluteDielectric() ), static_cast<float>( force.getSolventDielectric() ),
particle, radius, gammas, scaledRadii, bornRadiusScaleFactors, quinticSplineParameters);
}
void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& context) {
// ---------------------------------------------------------------------------------------
static const std::string methodName = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces";
// ---------------------------------------------------------------------------------------
_gpuContext* gpu = data.gpu;
int debug = 1;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
if( setSim == 0 ){
setSim++;
SetCalculateGBVISoftcoreBornSumGpuSim( gpu );
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
SetCalculateGBVIForces2Sim( gpu );
}
// required!!
gpu->bRecalculateBornRadii = true; // fixed
//kClearForces(gpu);
// calculate Born radii and first loop of GB/VI forces
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s\n", methodName.c_str(),
getQuinticScaling() ? "kReduceGBVIBornSumQuinticScaling" : "kReduceGBVIBornSum" );
(void) fflush( stderr );
}
kClearBornForces(gpu);
kCalculateGBVISoftcoreBornSum(gpu);
if( getQuinticScaling() ){
//(void) fprintf( stderr, "\n%s: calling kReduceGBVIBornSumQuinticScaling\n", methodName.c_str() ); fflush( stderr );
kReduceGBVIBornSumQuinticScaling(gpu, gpuGBVISoftcore );
} else {
kReduceGBVIBornSum(gpu);
}
kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling %s\n", methodName.c_str(),
getQuinticScaling() ? "kReduceGBVIBornForcesQuinticScaling" : "kReduceObcGbsaBornForces" );
(void) fflush( stderr );
}
// compute Born forces
if( getQuinticScaling() ){
kReduceGBVIBornForcesQuinticScaling(gpu);
} else {
gpu->bIncludeGBVI = true;
kReduceObcGbsaBornForces(gpu);
gpu->bIncludeGBVI = false;
}
if( debug && log ){
(void) fprintf( stderr, "\n%s: calling kCalculateGBVIForces2\n", methodName.c_str() );
(void) fflush( stderr );
}
// second loop of GB/VI forces
kCalculateGBVIForces2(gpu);
//kReduceForces(gpu);
}
double CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeEnergy(ContextImpl& context) {
executeForces( context );
return 0.0;
}
int CudaFreeEnergyCalcGBVISoftcoreForceKernel::getQuinticScaling( void ) const {
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::getQuinticScaling";
// ---------------------------------------------------------------------------------------
return quinticScaling;
}
void CudaFreeEnergyCalcGBVISoftcoreForceKernel::setQuinticScaling( int inputQuinticScaling) {
// ---------------------------------------------------------------------------------------
//static const std::string methodName = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::setQuinticScaling";
// ---------------------------------------------------------------------------------------
quinticScaling = inputQuinticScaling;
}
#ifndef OPENMM_FREE_ENERGY_CUDA_KERNELS_H_
#define OPENMM_FREE_ENERGY_CUDA_KERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
#include "openmm/kernels.h"
#include "kernels/gputypes.h"
#include "openmm/System.h"
#include "OpenMMFreeEnergy.h"
#include "openmm/freeEnergyKernels.h"
#include "kernels/GpuNonbondedSoftcore.h"
#include "kernels/GpuLJ14Softcore.h"
#include "kernels/GpuObcGbsaSoftcore.h"
#include "kernels/GpuGBVISoftcore.h"
//#define FreeEnergyDebug
namespace OpenMM {
/**
* This kernel is invoked by NonbondedSoftcoreForce to calculate the forces acting on the system.
*/
class CudaFreeEnergyCalcNonbondedSoftcoreForceKernel : public CalcNonbondedSoftcoreForceKernel {
public:
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
CalcNonbondedSoftcoreForceKernel(name, platform), data(data), system(system) {
gpuNonbondedSoftcore = NULL;
gpuLJ14Softcore = NULL;
#ifdef FreeEnergyDebug
log = stderr;
#else
log = NULL;
#endif
setSim = 0;
numExceptions = 0;
numParticles = 0;
bIncludeGBSA = false;
bIncludeGBVI = false;
bIncludeSoftcore = false;
}
~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the NonbondedForce this kernel will be used for
*/
void initialize(const System& system, const NonbondedSoftcoreForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the NonbondedForce
*/
double executeEnergy(ContextImpl& context);
/**
* Get flag signalling whether GBSA/OBC force is included
*
* @return flag
*/
bool getIncludeGBSA( void ) const;
/**
* Set flag signalling whether GBSA/OBC force is included
*
* @param inputIncludeGBSA input flag value
*/
void setIncludeGBSA( bool inputIncludeGBSA );
/**
* Get flag signalling whether GB/VI force is included
*
* @return flag
*/
bool getIncludeGBVI( void ) const;
/**
* Set flag signalling whether GB/VI force is included
*
* @param inputIncludeGBVI input flag value
*/
void setIncludeGBVI( bool inputIncludeGBVI );
/**
* Get flag signalling whether softcore force is included
*
* @return flag
*/
bool getIncludeSoftcore( void ) const;
/**
* Set flag signalling whether GB/VI force is included
*
* @param inputIncludeGBVI input flag value
*/
void setIncludeSoftcore( bool inputSoftcore);
/**
* Get number of exceptions
*
* @return number of exceptions
*/
int getNumExceptions( void ) const;
/**
* Get GpuLJ14Softcore
*
* @return GpuLJ14Softcore object
*/
GpuLJ14Softcore* getGpuLJ14Softcore( void ) const;
/**
* Set GpuLJ14Softcore
*
* @param GpuLJ14Softcore object
*/
void setGpuLJ14Softcore( GpuLJ14Softcore* gpuLJ14Softcore );
private:
CudaPlatform::PlatformData& data;
int numParticles;
System& system;
GpuNonbondedSoftcore* gpuNonbondedSoftcore;
GpuLJ14Softcore* gpuLJ14Softcore;
bool bIncludeGBSA;
bool bIncludeGBVI;
bool bIncludeSoftcore;
int numExceptions;
FILE* log;
int setSim;
};
/**
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
*/
class CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel : public CalcGBSAOBCSoftcoreForceKernel {
public:
CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcGBSAOBCSoftcoreForceKernel(name, platform), data(data) {
#ifdef FreeEnergyDebug
log = stderr;
#else
log = NULL;
#endif
setSim = 0;
gpuObcGbsaSoftcore = NULL;
}
~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBSAOBCForce this kernel will be used for
*/
void initialize(const System& system, const GBSAOBCSoftcoreForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the GBSAOBCForce
*/
double executeEnergy(ContextImpl& context);
private:
CudaPlatform::PlatformData& data;
FILE* log;
int setSim;
GpuObcGbsaSoftcore* gpuObcGbsaSoftcore;
};
/**
* This kernel is invoked by GBVIForce to calculate the forces acting on the system.
*/
class CudaFreeEnergyCalcGBVISoftcoreForceKernel : public CalcGBVISoftcoreForceKernel {
public:
CudaFreeEnergyCalcGBVISoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcGBVISoftcoreForceKernel(name, platform), data(data) {
#ifdef FreeEnergyDebug
log = stderr;
#else
log = NULL;
#endif
setSim = 0;
quinticScaling = 0;
gpuGBVISoftcore = NULL;
}
~CudaFreeEnergyCalcGBVISoftcoreForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBVIForce this kernel will be used for
* @param scaledRadii the scaled radii (Eq. 5 of Labute paper)
*/
void initialize(const System& system, const GBVISoftcoreForce& force, const std::vector<double> & scaledRadii);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the GBVIForce
*/
double executeEnergy(ContextImpl& context);
/**
* Apply quintic scaling for Born radii
*
* @return nonzero value if scaling is to be applied
*/
int getQuinticScaling(void) const;
/**
* Set flag for quintic scaling for Born radii
*
* @param nonzero value if scaling is to be applied
*/
void setQuinticScaling(int quinticScaling );
private:
CudaPlatform::PlatformData& data;
GpuGBVISoftcore* gpuGBVISoftcore;
FILE* log;
int setSim;
int quinticScaling;
};
} // namespace OpenMM
#endif /*OPENMM_FREE_ENERGY_CUDA_KERNELS_H_*/
#ifndef __GPU_FREE_ENERGY_KERNELS_H__
#define __GPU_FREE_ENERGY_KERNELS_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "cudatypes.h"
#include "GpuNonbondedSoftcore.h"
#include "GpuLJ14Softcore.h"
#include "GpuObcGbsaSoftcore.h"
#include "GpuGBVISoftcore.h"
#include <vector>
// Function prototypes
// CDLJ softcore
// setup methods called from CudaFreeEnergyKernels
// nonbonded and 1-4 ixns
extern "C"
GpuNonbondedSoftcore* gpuSetNonbondedSoftcoreParameters(gpuContext gpu, float epsfac, const std::vector<int>& atom, const std::vector<float>& c6,
const std::vector<float>& c12, const std::vector<float>& q,
const std::vector<float>& softcoreLJLambdaArray, const std::vector<char>& symbol,
const std::vector<std::vector<int> >& exclusions, CudaNonbondedMethod method);
extern "C"
GpuLJ14Softcore* gpuSetLJ14SoftcoreParameters(gpuContext gpu, float epsfac, float fudge, const std::vector<int>& atom1,
const std::vector<int>& atom2, const std::vector<float>& c6, const std::vector<float>& c12,
const std::vector<float>& q1, const std::vector<float>& q2, const std::vector<float>& softcoreLJLambdaArray);
// delete supplemtentary objects, ...
extern "C"
void gpuDeleteNonbondedSoftcoreParameters( void* gpuNonbondedSoftcore);
// write address's to device
extern "C"
void SetCalculateCDLJSoftcoreGpuSim( gpuContext gpu );
extern "C"
void SetCalculateCDLJSoftcoreSupplementarySim( float* gpuParticleSoftCoreLJLambda);
extern "C"
void SetCalculateLocalSoftcoreGpuSim( gpuContext gpu );
// kernel calls to device
extern "C"
void kCalculateCDLJSoftcoreForces(gpuContext gpu );
extern void kCalculateLocalSoftcoreForces( gpuContext gpu );
// GB/VI softcore
// setup method called from CudaFreeEnergyKernels
extern "C"
GpuGBVISoftcore* gpuSetGBVISoftcoreParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom, const std::vector<float>& radius,
const std::vector<float>& gamma, const std::vector<float>& scaledRadii,
const std::vector<float>& bornRadiusScaleFactors, const std::vector<float>& quinticSplineParameters);
// delete supplemtentary objects, ...
extern "C"
void gpuDeleteGBVISoftcoreParameters( void* gpuNonbondedSoftcore);
// write address's to device
extern "C"
void SetCalculateGBVISoftcoreForcesSim( gpuContext gpu, float* softCoreLJLambda);
extern "C"
void SetCalculateGBVISoftcoreBornSumGpuSim( gpuContext gpu);
extern "C"
void SetCalculateGBVISoftcoreSupplementarySim( GpuGBVISoftcore* gpuGBVISoftcore );
// kernel calls to device
extern void kReduceGBVIBornSumQuinticScaling( gpuContext gpu, GpuGBVISoftcore* gpuGBVISoftcore );
extern void kCalculateGBVISoftcoreBornSum( gpuContext gpu );
extern void kReduceGBVIBornForcesQuinticScaling( gpuContext gpu );
// Obc softcore
// setup method called from CudaFreeEnergyKernels
/**
* Initialize parameters for Cuda Obc softcore
*
* @param gpu gpu context
* @param innerDielectric solute dielectric
* @param solventDielectric solvent dielectric
* @param radius intrinsic Born radii
* @param scale Obc scaling factors
* @param charge atomic charges (possibly overwritten by other methods?)
* @param nonPolarScalingFactors non-polar scaling factors
*
*/
extern "C"
GpuObcGbsaSoftcore* gpuSetObcSoftcoreParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<float>& radius, const std::vector<float>& scale,
const std::vector<float>& charge, const std::vector<float>& nonPolarScalingFactors);
// delete supplemtentary objects, ...
extern "C"
void gpuDeleteObcSoftcoreParameters( void* gpuNonbondedSoftcore);
// write address's to device
extern "C"
void SetCalculateObcGbsaSoftcoreBornSumSim( gpuContext gpu );
extern "C"
void SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsSim( float* nonPolarScalingFactors );
// this method and kCalculateObcGbsaSoftcoreForces2() are being
// used until changes in OpenMM version are made
extern "C"
void SetCalculateObcGbsaSoftcoreForces2Sim( gpuContext gpu );
// kernel calls to device
extern void kCalculateObcGbsaSoftcoreBornSum( gpuContext gpu );
// this method is not needed; the OpenMM version can be used
extern void kCalculateObcGbsaSoftcoreForces2( gpuContext gpu );
// shared
extern "C"
void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpuContext gpu );
extern "C"
void SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( float* gpuParticleSoftCoreLJLambda);
extern void kCalculateCDLJObcGbsaSoftcoreForces1( gpuContext gpu );
#endif //__GPU_FREE_ENERGY_KERNELS_H__
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
using namespace std;
#include "GpuGBVISoftcore.h"
#include "GpuFreeEnergyCudaKernels.h"
// GpuGBVISoftcore constructor
GpuGBVISoftcore::GpuGBVISoftcore( ){
_bornRadiiScalingMethod = 0;
_quinticLowerLimitFactor = 0.8;
_quinticUpperLimit = 0.008;
_psSwitchDerivative = NULL;
}
// GpuGBVISoftcore destructor
GpuGBVISoftcore::~GpuGBVISoftcore( ){
delete _psSwitchDerivative;
}
// set quintic lower limit factor value
int GpuGBVISoftcore::setQuinticLowerLimitFactor( float inputQuinticLowerLimitFactor ){
_quinticLowerLimitFactor = inputQuinticLowerLimitFactor;
return 0;
}
// get quintic lower limit factor value
float GpuGBVISoftcore::getQuinticLowerLimitFactor( void ) const {
return _quinticLowerLimitFactor;
}
// set quintic upper limit value
int GpuGBVISoftcore::setQuinticUpperLimit( float inputQuinticUpperLimit ){
_quinticUpperLimit = inputQuinticUpperLimit;
return 0;
}
// get quintic upper limit value
float GpuGBVISoftcore::getQuinticUpperLimit( void ) const {
return _quinticUpperLimit;
}
// get Born radii scaling method
int GpuGBVISoftcore::getBornRadiiScalingMethod( void ) const {
return _bornRadiiScalingMethod;
}
// set Born radii scaling method
int GpuGBVISoftcore::setBornRadiiScalingMethod( int inputBornRadiiScalingMethod ){
_bornRadiiScalingMethod = inputBornRadiiScalingMethod;
return 0;
}
// get address for SwitchDerivative array on board
float* GpuGBVISoftcore::getGpuSwitchDerivative( void ) const {
return _psSwitchDerivative->_pDevStream[0];
}
// get SwitchDerivative array
CUDAStream<float>* GpuGBVISoftcore::getSwitchDerivative( void ) const {
return _psSwitchDerivative;
}
// initialize SwitchDerivative array
int GpuGBVISoftcore::initializeGpuSwitchDerivative( unsigned int numberOfParticles ){
_psSwitchDerivative = new CUDAStream<float>( numberOfParticles, 1, "SwitchDerivative");
for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
(*_psSwitchDerivative)[ii] = 1.0f;
}
return 0;
}
// upload SoftCoreLambda array
int GpuGBVISoftcore::upload( gpuContext gpu ){
if( getBornRadiiScalingMethod() > 0 ){
SetCalculateGBVISoftcoreSupplementarySim( this );
}
return 0;
}
#ifndef OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
#define OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "cudatypes.h"
#include "cudaKernels.h"
#include "openmm/OpenMMException.h"
// info related to nonbonded softcore
class GpuGBVISoftcore {
public:
/**
* This is an enumeration of the different methods that may be used for scaling of the Born radii.
*/
/**
* No scaling method is applied.
*/
static const int NoScaling = 0;
/**
* Use the method outlined in Proteins 55, 383-394 (2004), Eq. 6
*/
static const int Tanh = 1;
/**
* Use quintic spline scaling function
*/
static const int QuinticSpline = 2;
GpuGBVISoftcore();
~GpuGBVISoftcore();
/**
* Set softcore value
*/
int setSoftCoreLambda( float softCoreLambda );
/**
* Get softcore value
*/
float getSoftCoreLambda( void ) const;
/**
* Set quintic lower limit factor value
*/
int setQuinticLowerLimitFactor( float quinticLowerLimitFactor );
/**
* Get quintic lower limit factor value
*/
float getQuinticLowerLimitFactor( void ) const;
/**
* Set quintic upper limit value
*/
int setQuinticUpperLimit( float quinticUpperLimit );
/**
* Get quintic upper limit value
*/
float getQuinticUpperLimit( void ) const;
/**
* Get Born radii scaling method
*/
int getBornRadiiScalingMethod( void ) const;
/**
* Set Born radii scaling method
*/
int setBornRadiiScalingMethod( int bornRadiiScalingMethod );
// initialize SoftCoreLJLambda particle array
int initializeGpuSwitchDerivative( unsigned int numberOfParticles );
/**
* Get address for switch derivative array
*
* @return address
*/
float* getGpuSwitchDerivative( void ) const;
/**
* Get switch derivative array
*
* @return address
*/
CUDAStream<float>* getSwitchDerivative( void ) const;
/**
* Upload data
*
* @return 0 always
*/
int upload( gpuContext gpu );
private:
float _quinticLowerLimitFactor;
float _quinticUpperLimit;
unsigned int _bornRadiiScalingMethod;
CUDAStream<float>* _psSwitchDerivative;
};
#endif // OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "GpuLJ14Softcore.h"
#include "GpuFreeEnergyCudaKernels.h"
// GpuLJ14Softcore constructor
GpuLJ14Softcore::GpuLJ14Softcore( ){
psLJ14SoftcoreID = NULL;
psLJ14SoftcoreParameter = NULL;
}
// GpuLJ14Softcore destructor
GpuLJ14Softcore::~GpuLJ14Softcore( ){
delete psLJ14SoftcoreID;
delete psLJ14SoftcoreParameter;
}
int GpuLJ14Softcore::flipStrides( gpuContext gpu ){
int flip = gpu->sim.outputBuffers - 1;
for (int ii = 0; ii < psLJ14SoftcoreID->_stride; ii++)
{
(*psLJ14SoftcoreID)[ii].z = flip - (*psLJ14SoftcoreID)[ii].z;
(*psLJ14SoftcoreID)[ii].w = flip - (*psLJ14SoftcoreID)[ii].w;
}
psLJ14SoftcoreID->Upload();
return 0;
}
#ifndef OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
#define OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
struct cudaFreeEnergySimulationNonbonded14 {
unsigned int LJ14s; // Number of Lennard Jones 1-4 interactions
unsigned int LJ14_offset; // Offset to end of Lennard Jones 1-4 parameters
CudaNonbondedMethod nonbondedMethod; // How to handle nonbonded interactions
int4* pLJ14ID; // Lennard Jones 1-4 atom and output buffer IDs
float4* pLJ14Parameter; // Lennard Jones 1-4 parameters
};
// info related to nonbonded 1-4 softcore
class GpuLJ14Softcore {
public:
GpuLJ14Softcore();
~GpuLJ14Softcore();
CUDAStream<int4>* psLJ14SoftcoreID;
CUDAStream<float4>* psLJ14SoftcoreParameter;
cudaFreeEnergySimulationNonbonded14 feSim;
int flipStrides(gpuContext gpu);
private:
};
#endif // OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
using namespace std;
#include "GpuNonbondedSoftcore.h"
#include "GpuFreeEnergyCudaKernels.h"
// GpuNonbondedSoftcore constructor
GpuNonbondedSoftcore::GpuNonbondedSoftcore( ){
_softcoreLJLambda = 1.0f;
_psSoftcoreLJLambda = NULL;
}
GpuNonbondedSoftcore::~GpuNonbondedSoftcore( ){
delete _psSoftcoreLJLambda;
}
// set global softCoreLJLambda
int GpuNonbondedSoftcore::setSoftCoreLJLambda( float softCoreLJLambda ){
_softcoreLJLambda = softCoreLJLambda;
return 0;
}
// get global softCoreLJLambda
float GpuNonbondedSoftcore::getSoftCoreLJLambda( void ) const {
return _softcoreLJLambda;
}
// initialize SoftCoreLJLambda particle array
int GpuNonbondedSoftcore::initializeParticleSoftCoreLJLambda( unsigned int numberOfParticles ){
_psSoftcoreLJLambda = new CUDAStream<float>( numberOfParticles, 1, "SoftcoreLJLambda");
for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
(*_psSoftcoreLJLambda)[ii] = 1.0f;
}
return 0;
}
// set entry in SoftCoreLJLambda particle array
int GpuNonbondedSoftcore::setParticleSoftCoreLJLambda( unsigned int particleIndex, float softCoreLJLambda ){
(*_psSoftcoreLJLambda)[particleIndex] = softCoreLJLambda;
return 0;
}
// upload SoftCoreLJLambda array
int GpuNonbondedSoftcore::upload( gpuContext gpu ){
_psSoftcoreLJLambda->Upload();
SetCalculateCDLJSoftcoreSupplementarySim( getGpuParticleSoftCoreLJLambda() );
SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( getGpuParticleSoftCoreLJLambda() );
return 0;
}
// get address for SoftCoreLJLambda particle array on board
float* GpuNonbondedSoftcore::getGpuParticleSoftCoreLJLambda( void ) const {
return _psSoftcoreLJLambda->_pDevStream[0];
}
#ifndef OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
#define OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
// info related to nonbonded softcore
class GpuNonbondedSoftcore {
public:
GpuNonbondedSoftcore();
~GpuNonbondedSoftcore();
/**
* Set softcore value
*/
int setSoftCoreLJLambda( float softCoreLJLambda );
/**
* Get softcore value
*/
float getSoftCoreLJLambda( void ) const;
/**
* Initialize ParticleSoftCoreLJLambda array
*
* @param numberOfParticles number of particles
*
* @return 0 always
*/
int initializeParticleSoftCoreLJLambda( unsigned int numberOfParticles );
/**
* Upload data
*
* @param implicitSolvent set if implicit solvent is included in system
*
* @return 0 always
*/
int upload( gpuContext gpu );
/**
* Set particle softCoreLJLambda entry
*
* @param particleIndex index of particle
* @param softCoreLJLambda softCoreLJLambda value
*
* @return 0 always
*/
int setParticleSoftCoreLJLambda( unsigned int particleIndex, float softCoreLJLambda );
/**
* Get address for SoftCoreLJLambda particle array on board
*
* @return address
*/
float* getGpuParticleSoftCoreLJLambda( void ) const;
private:
float _softcoreLJLambda;
CUDAStream<float>* _psSoftcoreLJLambda;
};
#endif // OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
using namespace std;
#include "GpuObcGbsaSoftcore.h"
#include "GpuFreeEnergyCudaKernels.h"
// GpuObcGbsaSoftcore constructor
GpuObcGbsaSoftcore::GpuObcGbsaSoftcore( ){
_psNonPolarScalingFactors = NULL;
}
GpuObcGbsaSoftcore::~GpuObcGbsaSoftcore( ){
delete _psNonPolarScalingFactors;
}
// initialize NonPolarScalingFactors array
int GpuObcGbsaSoftcore::initializeNonPolarScalingFactors( unsigned int numberOfParticles ){
_psNonPolarScalingFactors = new CUDAStream<float>( numberOfParticles, 1, "ObcSoftcoreNonPolarScalingFactors");
for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
(*_psNonPolarScalingFactors)[ii] = 1.0f;
}
return 0;
}
// set entry in NonPolarScalingFactors array
int GpuObcGbsaSoftcore::setNonPolarScalingFactors( unsigned int particleIndex, float nonPolarScalingFactor ){
(*_psNonPolarScalingFactors)[particleIndex] = nonPolarScalingFactor;
return 0;
}
// upload NonPolarScalingFactors array
int GpuObcGbsaSoftcore::upload( gpuContext gpu ){
_psNonPolarScalingFactors->Upload();
SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsSim( getGpuNonPolarScalingFactors() );
return 0;
}
// get address for NonPolarScalingFactors array on board
float* GpuObcGbsaSoftcore::getGpuNonPolarScalingFactors( void ) const {
return _psNonPolarScalingFactors->_pDevStream[0];
}
#ifndef OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
#define OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
// info related to nonbonded softcore
class GpuObcGbsaSoftcore {
public:
GpuObcGbsaSoftcore();
~GpuObcGbsaSoftcore();
/**
* Initialize NonPolarScalingFactors array
*
* @param numberOfParticles number of particles
*
* @return 0 always
*/
int initializeNonPolarScalingFactors( unsigned int numberOfParticles );
/**
* Upload data
*
* @param implicitSolvent set if implicit solvent is included in system
*
* @return 0 always
*/
int upload( gpuContext gpu );
/**
* Set nonPolarScalingFactor entry
*
* @param particleIndex index of particle
* @param nonPolarScalingFactor nonPolarScalingFactor value
*
* @return 0 always
*/
int setNonPolarScalingFactors( unsigned int particleIndex, float nonPolarScalingFactor );
/**
* Get address for NonPolarScalingFactors array on board
*
* @return address
*/
float* getGpuNonPolarScalingFactors( void ) const;
private:
CUDAStream<float>* _psNonPolarScalingFactors;
};
#endif // OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "gputypes.h"
#include "cudatypes.h"
#include "cudaKernels.h"
#include "GpuFreeEnergyCudaKernels.h"
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
struct Atom {
float x;
float y;
float z;
float q;
float sig;
float eps;
float br;
float softCoreLJLambda;
float fx;
float fy;
float fz;
float fb;
};
struct cudaFreeEnergySimulation {
float* pParticleSoftCoreLJLambda;
};
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaFreeEnergySimulation feSimDev;
void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpuContext gpu )
{
cudaError_t status;
(void) fprintf( stderr, "SetCalculateCDLJObcGbsaSoftcoreGpu1Sim gpu=%p cSim=%p sizeof=%u\n", gpu, &gpu->sim, sizeof(cudaGmxSimulation) ); fflush( stderr );
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to cSim failed");
}
void SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( float* gpuParticleSoftCoreLJLambda)
{
cudaError_t status;
(void) fprintf( stderr, "SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim\n" );
struct cudaFreeEnergySimulation feSim;
feSim.pParticleSoftCoreLJLambda = gpuParticleSoftCoreLJLambda;
status = cudaMemcpyToSymbol(feSimDev, &feSim, sizeof(cudaFreeEnergySimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim failed");
}
void GetCalculateCDLJObcGbsaSoftcoreForces1Sim( gpuContext gpu )
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
texture<float, 1, cudaReadModeElementType> tabulatedErfcRef;
__device__ float fastErfc(float r)
{
float normalized = cSim.tabulatedErfcScale*r;
int index = (int) normalized;
float fract2 = normalized-index;
float fract1 = 1.0f-fract2;
return fract1*tex1Dfetch(tabulatedErfcRef, index) + fract2*tex1Dfetch(tabulatedErfcRef, index+1);
}
// Include versions of the kernel for N^2 calculations.
#if 0
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
#endif
// Include versions of the kernel for N^2 calculations with softcore LJ.
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2##b
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_SOFTCORE_LJ
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
#undef USE_SOFTCORE_LJ
// Include versions of the kernel with cutoffs.
#if 0
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernel with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernels for Ewald
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
extern __global__ void kFindBlockBoundsCutoff_kernel();
extern __global__ void kFindBlockBoundsPeriodic_kernel();
extern __global__ void kFindBlocksWithInteractionsCutoff_kernel();
extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
extern __global__ void kCalculateEwaldFastCosSinSums_kernel();
extern __global__ void kCalculateEwaldFastForces_kernel();
extern void kCalculatePME(gpuContext gpu);
#endif
/**
*
* Calculate Born radii and first GBSA loop forces/energy
*
* @param gpu gpu contexct
* @param gbsaObc if set, calculate Born radii for OBC
* otherwise calculate Born radii for GB/VI
*
*/
void kCalculateCDLJObcGbsaSoftcoreForces1(gpuContext gpu )
{
// printf("kCalculateCDLJObcGbsaForces1\n");
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
// use softcore LJ potential
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaN2Forces1");
break;
#if 0
case CUTOFF:
kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsCutoff");
kFindBlocksWithInteractionsCutoff_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsCutoff");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaCutoffForces1");
break;
case PERIODIC:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break;
case EWALD:
case PARTICLE_MESH_EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture(NULL, &tabulatedErfcRef, gpu->psTabulatedErfc->_pDevData, &channelDesc, gpu->psTabulatedErfc->_length*sizeof(float));
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
if (gpu->sim.nonbondedMethod == EWALD)
{
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
}
else
kCalculatePME(gpu);
#endif
}
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evalauating nonbonded forces and the first stage of GBSA.
* It is included several times in kCalculateCDLJObcGbsaForces1.cu with different #defines to generate
* different versions of the kernels.
*/
#ifdef USE_SOFTCORE_LJ
#include "kSoftcoreLJ.h"
#endif
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
#define COULOMB_ON
__global__ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
float CDLJObcGbsa_energy;
float energy = 0.0f;
#ifdef USE_CUTOFF
float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
#endif
#ifdef USE_EWALD
const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif
unsigned int lasty = -0xFFFFFFFF;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
unsigned int x = workUnit[pos];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
bool bExclusionFlag = (x & 0x1);
x = (x >> 17) << GRIDBITS;
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int i = x + tgx;
float4 apos = cSim.pPosq[i];
float2 a = cSim.pAttr[i];
float softCoreLJLambda = feSimDev.pParticleSoftCoreLJLambda[i];
float br = cSim.pBornRadii[i];
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
float4 af;
af.x = 0.0f;
af.y = 0.0f;
af.z = 0.0f;
af.w = 0.0f;
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA[threadIdx.x].x = apos.x;
sA[threadIdx.x].y = apos.y;
sA[threadIdx.x].z = apos.z;
sA[threadIdx.x].q = apos.w;
float q2 = cSim.preFactor * apos.w;
apos.w *= cSim.epsfac;
sA[threadIdx.x].sig = a.x;
sA[threadIdx.x].eps = a.y;
sA[threadIdx.x].br = br;
sA[threadIdx.x].softCoreLJLambda = softCoreLJLambda;
if (!bExclusionFlag)
{
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2);
float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps;
#ifdef USE_SOFTCORE_LJ
float dEdR = getSoftCoreLJ( r2, sig, eps, softCoreLJLambda, psA[j].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#endif
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
float erfcAlphaR = fastErfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
#ifdef COULOMB_ON
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
CDLJObcGbsa_energy += factorX;
#endif
#endif
dEdR *= invR * invR;
// ObcGbsaForce1 part
float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
/* E */
CDLJObcGbsa_energy = 0.0f;
}
#endif
/* E */
if (i < cSim.atoms)
{
energy += 0.5f*CDLJObcGbsa_energy;
}
// Add Forces
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
}
}
else // bExclusion
{
unsigned int xi = x>>GRIDBITS;
unsigned int cell = xi+xi*cSim.paddedNumberOfAtoms/GRID-xi*(xi+1)/2;
unsigned int excl = cSim.pExclusion[cSim.pExclusionIndex[cell]+tgx];
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2);
float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps;
#ifdef USE_SOFTCORE_LJ
float dEdR = getSoftCoreLJ( r2, sig, eps, softCoreLJLambda, psA[j].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#endif
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
float erfcAlphaR = fastErfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
#ifdef COULOMB_ON
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
CDLJObcGbsa_energy += factorX;
#endif
#endif
dEdR *= invR * invR;
#ifdef USE_EWALD
if (!(excl & 0x1) && !needCorrection)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
// ObcGbsaForce1 part
float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
#if defined USE_PERIODIC
if (i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#elif defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#endif
if (i < cSim.atoms)
{
energy += 0.5f*CDLJObcGbsa_energy;
}
// Add Forces
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
excl >>= 1;
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
float4 of = cSim.pForce4a[offset];
float bf = cSim.pBornForce[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
bf += af.w;
cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = bf;
}
else // 100% utilization
{
// Read fixed atom data into registers and GRF
if (lasty != y)
{
unsigned int j = y + tgx;
float4 temp = cSim.pPosq[j];
float2 temp1 = cSim.pAttr[j];
float temp2 = feSimDev.pParticleSoftCoreLJLambda[j];
//float temp2 = 1.0f;
sA[threadIdx.x].br = cSim.pBornRadii[j];
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].q = temp.w;
sA[threadIdx.x].sig = temp1.x;
sA[threadIdx.x].eps = temp1.y;
sA[threadIdx.x].softCoreLJLambda = temp2;
}
sA[threadIdx.x].fx = 0.0f;
sA[threadIdx.x].fy = 0.0f;
sA[threadIdx.x].fz = 0.0f;
sA[threadIdx.x].fb = 0.0f;
float q2 = apos.w * cSim.preFactor;
apos.w *= cSim.epsfac;
if (!bExclusionFlag)
{
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
}
else if (flags == 0xFFFFFFFF)
#endif
{
// Compute all interactions within this block.
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2);
float sig = a.x + psA[tj].sig;
float eps = a.y * psA[tj].eps;
#ifdef USE_SOFTCORE_LJ
float dEdR = getSoftCoreLJ( r2, sig, eps, softCoreLJLambda, psA[tj].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#endif
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
float erfcAlphaR = fastErfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
#ifdef COULOMB_ON
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
CDLJObcGbsa_energy += factorX;
#endif
#endif
dEdR *= invR * invR;
// ObcGbsaForce1 part
float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (q2 * psA[tj].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[tj].q) / denominator;
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#endif
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
tj = (tj + 1) & (GRID - 1);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2);
float sig = a.x + psA[j].sig;
float eps = a.y * psA[j].eps;
#ifdef USE_SOFTCORE_LJ
float dEdR = getSoftCoreLJ( r2, sig, eps, softCoreLJLambda, psA[j].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#endif
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
float erfcAlphaR = fastErfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
#ifdef COULOMB_ON
float factorX = apos.w * psA[j].q * invR;
dEdR += factorX;
CDLJObcGbsa_energy += factorX;
#endif
#endif
dEdR *= invR * invR;
// ObcGbsaForce1 part
float alpha2_ij = br * psA[j].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (q2 * psA[j].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[j].br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
/* E */
CDLJObcGbsa_energy += (q2 * psA[j].q) / denominator;
// Sum the Born forces.
tempBuffer[threadIdx.x] = dGpol_dalpha2_ij * br;
if (tgx % 2 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+1];
if (tgx % 4 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+2];
if (tgx % 8 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+4];
if (tgx % 16 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+8];
if (tgx == 0)
psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
#ifdef USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#endif
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
tempBuffer[threadIdx.x] = dx;
if (tgx % 2 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+1];
if (tgx % 4 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+2];
if (tgx % 8 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+4];
if (tgx % 16 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+8];
if (tgx == 0)
psA[j].fx += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
tempBuffer[threadIdx.x] = dy;
if (tgx % 2 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+1];
if (tgx % 4 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+2];
if (tgx % 8 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+4];
if (tgx % 16 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+8];
if (tgx == 0)
psA[j].fy += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
tempBuffer[threadIdx.x] = dz;
if (tgx % 2 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+1];
if (tgx % 4 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+2];
if (tgx % 8 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+4];
if (tgx % 16 == 0)
tempBuffer[threadIdx.x] += tempBuffer[threadIdx.x+8];
if (tgx == 0)
psA[j].fz += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
}
}
}
#endif
}
else // bExclusion
{
unsigned int xi = x>>GRIDBITS;
unsigned int yi = y>>GRIDBITS;
unsigned int cell = xi+yi*cSim.paddedNumberOfAtoms/GRID-yi*(yi+1)/2;
unsigned int excl = cSim.pExclusion[cSim.pExclusionIndex[cell]+tgx];
excl = (excl >> tgx) | (excl << (GRID - tgx));
for (unsigned int j = 0; j < GRID; j++)
{
float dx = psA[tj].x - apos.x;
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float invR = 1.0f / sqrt(r2);
float sig = a.x + psA[tj].sig;
float eps = a.y * psA[tj].eps;
#ifdef USE_SOFTCORE_LJ
float dEdR = getSoftCoreLJ( r2, sig, eps, softCoreLJLambda, psA[tj].softCoreLJLambda, &CDLJObcGbsa_energy );
#else
// CDLJ part
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
/* E */
CDLJObcGbsa_energy = eps * (sig6 - 1.0f) * sig6;
#endif
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
float erfcAlphaR = fastErfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#endif
#else
#ifdef COULOMB_ON
float factorX = apos.w * psA[tj].q * invR;
dEdR += factorX;
CDLJObcGbsa_energy += factorX;
#endif
#endif
dEdR *= invR * invR;
#ifdef USE_EWALD
if (!(excl & 0x1) && !needCorrection)
#else
if (!(excl & 0x1))
#endif
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
// ObcGbsaForce1 part
float alpha2_ij = br * psA[tj].br;
float D_ij = r2 / (4.0f * alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij * expTerm;
float denominator = sqrt(denominator2);
float Gpol = (q2 * psA[tj].q) / (denominator * denominator2);
float dGpol_dalpha2_ij = -0.5f * Gpol * expTerm * (1.0f + D_ij);
af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br;
dEdR += Gpol * (1.0f - 0.25f * expTerm);
CDLJObcGbsa_energy += (q2 * psA[tj].q) / denominator;
#if defined USE_PERIODIC
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#elif defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dEdR = 0.0f;
CDLJObcGbsa_energy = 0.0f;
}
#endif
if (i < cSim.atoms)
{
energy += CDLJObcGbsa_energy;
}
// Add forces
dx *= dEdR;
dy *= dEdR;
dz *= dEdR;
af.x -= dx;
af.y -= dy;
af.z -= dz;
psA[tj].fx += dx;
psA[tj].fy += dy;
psA[tj].fz += dz;
excl >>= 1;
tj = (tj + 1) & (GRID - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
#endif
float4 of = cSim.pForce4a[offset];
float bf = cSim.pBornForce[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
bf += af.w;
cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = bf;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset = y + tgx + warp*cSim.stride;
#else
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
#endif
of = cSim.pForce4a[offset];
bf = cSim.pBornForce[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
bf += sA[threadIdx.x].fb;
cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = bf;
#if 0
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*cSim.stride;
float4 of = cSim.pForce4a[offset];
of.x += af.x;
of.y += af.y;
of.z += af.z;
of.w += af.w;
cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = af.w;
offset = y + tgx + warp*cSim.stride;
of = cSim.pForce4a[offset];
of.x += sA[threadIdx.x].fx;
of.y += sA[threadIdx.x].fy;
of.z += sA[threadIdx.x].fz;
of.w += sA[threadIdx.x].fb;
cSim.pForce4a[offset] = of;
cSim.pBornForce[offset] = af.w;
#else
unsigned int offset = x + tgx + (y >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
af.x = sA[threadIdx.x].fx;
af.y = sA[threadIdx.x].fy;
af.z = sA[threadIdx.x].fz;
af.w = sA[threadIdx.x].fb;
offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pForce4a[offset] = af;
cSim.pBornForce[offset] = af.w;
#endif
#endif
lasty = y;
}
pos++;
}
cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "GpuGBVISoftcore.h"
#include "GpuFreeEnergyCudaKernels.h"
struct cudaFreeEnergySimulationGBVI {
float quinticLowerLimitFactor;
float quinticUpperLimit;
float* pSwitchDerivative;
};
struct cudaFreeEnergySimulationGBVI gbviSim;
static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaFreeEnergySimulationGBVI gbviSimDev;
void SetCalculateGBVISoftcoreBornSumGpuSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateGBVISoftcoreBornSumGpuSim copy to cSim failed");
(void) fprintf( stderr, "SetCalculateGBVISoftcoreBornSumGpuSim\n" );
}
void GetCalculateGBVISoftcoreBornSumSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
void SetCalculateGBVISoftcoreSupplementarySim( GpuGBVISoftcore* gpuGBVISoftcore )
{
cudaError_t status;
gbviSim.pSwitchDerivative = gpuGBVISoftcore->getGpuSwitchDerivative();
gbviSim.quinticLowerLimitFactor = gpuGBVISoftcore->getQuinticLowerLimitFactor();
gbviSim.quinticUpperLimit = gpuGBVISoftcore->getQuinticUpperLimit();
status = cudaMemcpyToSymbol(gbviSimDev, &gbviSim, sizeof(cudaFreeEnergySimulationGBVI));
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateGBVISoftcoreSupplementarySim");
(void) fprintf( stderr, "SetCalculateGBVISoftcoreSupplementarySim %14.6e %14.6e swDerv=%p\n",
gbviSim.quinticLowerLimitFactor, gbviSim.quinticUpperLimit, gbviSim.pSwitchDerivative );
}
// create, initialize and enter BornRadiusScaleFactors values (used to scale contribution of atoms to Born sum of other atoms)
// return handle to GpuGBVISoftcore object
extern "C"
GpuGBVISoftcore* gpuSetGBVISoftcoreParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom,
const std::vector<float>& radius, const std::vector<float>& gamma,
const std::vector<float>& scaledRadii, const std::vector<float>& bornRadiusScaleFactors,
const std::vector<float>& quinticSplineParameters)
{
static const float electricConstant = -166.02691f;
unsigned int atoms = atom.size();
double tau = ((1.0f/innerDielectric)-(1.0f/solventDielectric));
// create gpuGBVISoftcore, load parameters, and track minimum softcore value
// gpuGBVISoftcore is not really being used (it was in the initial implementation) --
// will be removed in future once confirmed not needed
GpuGBVISoftcore* gpuGBVISoftcore = new GpuGBVISoftcore();
unsigned int numberOfParticles = radius.size();
// check if quintic scaling to be applied
if( quinticSplineParameters.size() == 2 ){
gpuGBVISoftcore->setBornRadiiScalingMethod( 1 );
gpuGBVISoftcore->setQuinticLowerLimitFactor( quinticSplineParameters[0] );
gpuGBVISoftcore->setQuinticUpperLimit( quinticSplineParameters[1] );
gpuGBVISoftcore->initializeGpuSwitchDerivative( gpu->sim.paddedNumberOfAtoms );
}
for (unsigned int i = 0; i < bornRadiusScaleFactors.size(); i++)
{
(*gpu->psGBVIData)[i].x = radius[i];
(*gpu->psGBVIData)[i].y = scaledRadii[i];
(*gpu->psGBVIData)[i].z = tau*gamma[i];
(*gpu->psGBVIData)[i].w = bornRadiusScaleFactors[i];
(*gpu->psObcData)[i].x = radius[i];
(*gpu->psObcData)[i].y = 0.9f*radius[i];
}
// Dummy out extra atom data
for (unsigned int i = atoms; i < gpu->sim.paddedNumberOfAtoms; i++)
{
(*gpu->psBornRadii)[i] = 0.2f;
(*gpu->psGBVIData)[i].x = 0.01f;
(*gpu->psGBVIData)[i].y = 0.01f;
(*gpu->psGBVIData)[i].z = 0.01f;
(*gpu->psGBVIData)[i].w = 1.00f;
}
#undef DUMP_PARAMETERS
#define DUMP_PARAMETERS 1
#if (DUMP_PARAMETERS == 1)
(void) fprintf( stderr,"GBVI softcore param %u %u sclMeth=%d LwFct=%8.3f UpLmt=[%12.5e (nm) %12.5e]\nR scaledR gamma*tau= bornRadiusScaleFactor \n",
bornRadiusScaleFactors.size(), gpu->sim.paddedNumberOfAtoms,
gpuGBVISoftcore->getBornRadiiScalingMethod(), gpuGBVISoftcore->getQuinticLowerLimitFactor(),
powf( gpuGBVISoftcore->getQuinticUpperLimit(), -0.3333333f ), gpuGBVISoftcore->getQuinticUpperLimit() );
int maxPrint = 31;
for (unsigned int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++)
{
(void) fprintf( stderr,"%6u %14.7e %14.7e %14.7e %14.7e\n",
ii, (*gpu->psGBVIData)[ii].x, (*gpu->psGBVIData)[ii].y, (*gpu->psGBVIData)[ii].z, (*gpu->psGBVIData)[ii].w );
if( ii == maxPrint ){
ii = gpu->sim.paddedNumberOfAtoms - maxPrint;
if( ii < maxPrint )ii = maxPrint;
}
}
#endif
gpu->psBornRadii->Upload();
gpu->psGBVIData->Upload();
gpu->psObcData->Upload();
gpu->sim.preFactor = 2.0f*electricConstant*((1.0f/innerDielectric)-(1.0f/solventDielectric))*gpu->sim.forceConversionFactor;
gpuGBVISoftcore->upload( gpu );
#if (DUMP_PARAMETERS == 1)
(void) fprintf( stderr, "gpuSetGBVISoftcoreParameters: preFactor=%14.6e elecCnstnt=%.4f frcCnvrsnFctr=%.4f tau=%.4f.\n",
gpu->sim.preFactor, 2.0f*electricConstant, gpu->sim.forceConversionFactor, ((1.0f/innerDielectric)-(1.0f/solventDielectric)) );
#endif
return gpuGBVISoftcore;
}
// delete gpuGBVISoftcore
extern "C"
void gpuDeleteGBVISoftcoreParameters( void* gpuGBVISoftcore)
{
delete gpuGBVISoftcore;
}
struct Atom {
float x;
float y;
float z;
float r;
float sr;
float sum;
float gamma;
float bornRadiusScaleFactor;
};
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
// Include versions of the kernels with cutoffs.
#if 0
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVISoftcoreBornSum.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreBornSum.h"
#endif
#if 0
__global__ void kClearGBVISoftcoreBornSum_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornSum)[pos] = 0.0f;
pos += gridDim.x * blockDim.x;
}
}
#endif
__device__ void quinticSpline( float x, float rl, float ru, float* outValue, float* outDerivative )
{
float numerator = x - rl;
float denominator = ru - rl;
float ratio = numerator/denominator;
float ratio2 = ratio*ratio;
float ratio3 = ratio2*ratio;
*outValue = 1.0f + ratio3*(-10.f + 15.0f*ratio - 6.0f*ratio2);
*outDerivative = ratio2*(-30.0f + 60.0f*ratio - 30.0f*ratio2)/denominator;
}
__global__ void kReduceGBVIBornSumQuinticScaling_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
while (pos < cSim.atoms)
{
float sum = 0.0f;
float* pSt = cSim.pBornSum + pos;
float4 atom = cSim.pGBVIData[pos];
// Get summed Born data
for (int i = 0; i < cSim.nonbondOutputBuffers; i++)
{
sum += *pSt;
// printf("%4d %4d A: %9.4f\n", pos, i, *pSt);
pSt += cSim.stride;
}
// Now calculate Born radius
float Rinv = 1.0f/atom.x;
float r3 = Rinv*Rinv*Rinv;
float splineL = gbviSimDev.quinticLowerLimitFactor*r3;
//float bSum = sum;
float switchDeriviative;
if( sum > splineL ){
if( sum < r3 ){
float splineValue, splineDerivative;
quinticSpline( sum, splineL, r3, &splineValue, &splineDerivative );
switchDeriviative = splineValue - (r3 - sum)*splineDerivative;
sum = (r3 - sum)*splineValue + gbviSimDev.quinticUpperLimit;
} else {
sum = gbviSimDev.quinticUpperLimit;
switchDeriviative = 0.0f;
}
} else {
sum = r3 - sum;
switchDeriviative = 1.0f;
}
cSim.pBornRadii[pos] = pow( sum, (-1.0f/3.0f) );
//cSim.pBornSum[pos] = bSum;
gbviSimDev.pSwitchDerivative[pos] = switchDeriviative;
pos += gridDim.x * blockDim.x;
}
}
void kReduceGBVIBornSumQuinticScaling(gpuContext gpu, GpuGBVISoftcore* gpuGBVISoftcore)
{
//printf("kReduceGBVIBornSumQuinticScaling_kernel\n");
kReduceGBVIBornSumQuinticScaling_kernel<<<gpu->sim.blocks, 384>>>();
gpu->bRecalculateBornRadii = false;
LAUNCHERROR("kReduceGBVIBornSumQuinticScaling_kernel");
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
gpu->psGBVIData->Download();
gpu->psBornSum->Download();
gpu->psBornRadii->Download();
gpu->psPosq4->Download();
CUDAStream<float>* psSwitchDerivative = gpuGBVISoftcore->getSwitchDerivative();
psSwitchDerivative->Download();
(void) fprintf( stderr, "\nkReduceGBVIBornSumQuinticScaling: Post BornSum %s Born radii & params\n",
(gpu->bIncludeGBVI ? "GBVI" : "Obc") );
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%6d bSum=%14.6e bR=%14.6e swDerv=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f] %s\n",
ii,
gpu->psBornSum->_pSysStream[0][ii],
gpu->psBornRadii->_pSysStream[0][ii],
psSwitchDerivative->_pSysStream[0][ii],
gpu->psGBVIData->_pSysStream[0][ii].x,
gpu->psGBVIData->_pSysStream[0][ii].y,
gpu->psGBVIData->_pSysStream[0][ii].z,
gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w,
(fabs( psSwitchDerivative->_pSysStream[0][ii] - 1.0 ) > 1.0e-05 ? "SWWWWW" : "")
);
}
#endif
#undef GBVI_DEBUG
}
__global__ void kReduceGBVIBornForcesQuinticScaling_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float energy = 0.0f;
while (pos < cSim.atoms)
{
float bornRadius = cSim.pBornRadii[pos];
float4 gbviData = cSim.pGBVIData[pos];
float switchDeriv = gbviSimDev.pSwitchDerivative[pos];
float totalForce = 0.0f;
float* pFt = cSim.pBornForce + pos;
int i = cSim.nonbondOutputBuffers;
while (i >= 4)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
float f3 = *pFt;
pFt += cSim.stride;
float f4 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2 + f3 + f4;
i -= 4;
}
if (i >= 2)
{
float f1 = *pFt;
pFt += cSim.stride;
float f2 = *pFt;
pFt += cSim.stride;
totalForce += f1 + f2;
i -= 2;
}
if (i > 0)
{
totalForce += *pFt;
}
float ratio = (gbviData.x/bornRadius);
float ratio3 = ratio*ratio*ratio;
energy -= gbviData.z*ratio3;
totalForce += (3.0f*gbviData.z*ratio3)/bornRadius; // 'cavity' term
float br2 = bornRadius*bornRadius;
totalForce *= (1.0f/3.0f)*br2*br2*switchDeriv;
pFt = cSim.pBornForce + pos;
*pFt = totalForce;
pos += gridDim.x * blockDim.x;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy;
}
void kReduceGBVIBornForcesQuinticScaling(gpuContext gpu)
{
//printf("kReduceObcGbsaBornForces\n");
kReduceGBVIBornForcesQuinticScaling_kernel<<<gpu->sim.blocks, gpu->sim.bf_reduce_threads_per_block>>>();
LAUNCHERROR("kReduceGBVIBornForcesQuinticScaling");
}
void kCalculateGBVISoftcoreBornSum(gpuContext gpu)
{
//printf("kCalculateGBVIBornSum\n");
kClearGBVIBornSum( gpu );
LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum");
size_t numWithInteractions;
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
#define GBVI 0
#if GBVI == 1
int maxPrint = 31;
gpu->psWorkUnit->Download();
fprintf( stderr, "kCalculateGBVISoftcoreBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
gpu->psGBVIData->Download();
gpu->psBornSum->Download();
gpu->psPosq4->Download();
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreBornSum: pre BornSum %s Born radii & params\n",
(gpu->bIncludeGBVI ? "GBVI" : "Obc") );
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%6d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]\n",
ii,
gpu->psBornSum->_pSysStream[0][ii],
gpu->psGBVIData->_pSysStream[0][ii].x,
gpu->psGBVIData->_pSysStream[0][ii].y,
gpu->psGBVIData->_pSysStream[0][ii].z,
gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w
);
if( (ii == maxPrint) && ( ii < (gpu->natoms - maxPrint)) ){
ii = gpu->natoms - maxPrint;
}
}
#endif
#undef GBVI
if (gpu->bOutputBufferPerWarp){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
} else {
kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
}
break;
#if 0
case CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVICutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateGBVICutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
case PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVIPeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
else
kCalculateGBVIPeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
#endif
}
LAUNCHERROR("kCalculateGBVIBornSum");
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment