/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#include "gputypes.h"
#include "cudatypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct Atom {
float x;
float y;
float z;
float4 params;
float fx;
float fy;
float fz;
};
static __constant__ cudaGmxSimulation cSim;
static __constant__ Expression<128> forceExp;
static __constant__ Expression<128> energyExp;
static __constant__ Expression<64> combiningRules[4];
void SetCalculateCustomNonbondedForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}
void GetCalculateCustomNonbondedForcesSim(gpuContext gpu)
{
cudaError_t status;
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
void SetCustomNonbondedForceExpression(const Expression<128>& expression)
{
cudaError_t status;
status = cudaMemcpyToSymbol(forceExp, &expression, sizeof(forceExp));
RTERROR(status, "SetCustomNonbondedForceExpression: cudaMemcpyToSymbol failed");
}
void SetCustomNonbondedEnergyExpression(const Expression<128>& expression)
{
cudaError_t status;
status = cudaMemcpyToSymbol(energyExp, &expression, sizeof(energyExp));
RTERROR(status, "SetCustomNonbondedEnergyExpression: cudaMemcpyToSymbol failed");
}
void SetCustomNonbondedCombiningRules(const Expression<64>* expressions)
{
cudaError_t status;
status = cudaMemcpyToSymbol(combiningRules, expressions, sizeof(combiningRules));
RTERROR(status, "SetCustomNonbondedCombiningRules: cudaMemcpyToSymbol failed");
}
template
__device__ float kEvaluateExpression_kernel(Expression* expression, float* stack, float var0, float4 vars1, float4 vars2)
{
int stackPointer = -1;
for (int i = 0; i < expression->length; i++)
{
switch (expression->op[i])
{
case CONSTANT:
stack[++stackPointer] = expression->arg[i];
break;
case VARIABLE0:
stack[++stackPointer] = var0;
break;
case VARIABLE1:
stack[++stackPointer] = vars1.x;
break;
// case VARIABLE2:
// stack[++stackPointer] = vars1.y;
// break;
// case VARIABLE3:
// stack[++stackPointer] = vars1.z;
// break;
// case VARIABLE4:
// stack[++stackPointer] = vars1.w;
// break;
case VARIABLE5:
stack[++stackPointer] = vars2.x;
break;
// case VARIABLE6:
// stack[++stackPointer] = vars2.y;
// break;
// case VARIABLE7:
// stack[++stackPointer] = vars2.z;
// break;
// case VARIABLE8:
// stack[++stackPointer] = vars2.w;
// break;
case ADD:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp+stack[--stackPointer];
break;
}
case SUBTRACT:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp-stack[--stackPointer];
break;
}
case MULTIPLY:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*stack[--stackPointer];
break;
}
case DIVIDE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp/stack[--stackPointer];
break;
}
case POWER:
{
float temp = stack[stackPointer];
stack[stackPointer] = pow(temp, stack[--stackPointer]);
break;
}
case NEGATE:
stack[stackPointer] = -stack[stackPointer];
break;
case SQRT:
stack[stackPointer] = sqrt(stack[stackPointer]);
break;
case EXP:
stack[stackPointer] = exp(stack[stackPointer]);
break;
case LOG:
stack[stackPointer] = log(stack[stackPointer]);
break;
case SIN:
stack[stackPointer] = sin(stack[stackPointer]);
break;
case COS:
stack[stackPointer] = cos(stack[stackPointer]);
break;
case SEC:
stack[stackPointer] = 1.0f/cos(stack[stackPointer]);
break;
case CSC:
stack[stackPointer] = 1.0f/sin(stack[stackPointer]);
break;
case TAN:
stack[stackPointer] = tan(stack[stackPointer]);
break;
case COT:
stack[stackPointer] = 1.0f/tan(stack[stackPointer]);
break;
case ASIN:
stack[stackPointer] = asin(stack[stackPointer]);
break;
case ACOS:
stack[stackPointer] = acos(stack[stackPointer]);
break;
case ATAN:
stack[stackPointer] = atan(stack[stackPointer]);
break;
case SQUARE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp;
break;
}
case CUBE:
{
float temp = stack[stackPointer];
stack[stackPointer] = temp*temp*temp;
break;
}
case RECIPROCAL:
stack[stackPointer] = 1.0f/stack[stackPointer];
break;
case INCREMENT:
stack[stackPointer] = stack[stackPointer]+1.0f;
break;
case DECREMENT:
stack[stackPointer] = stack[stackPointer]-1.0f;
break;
}
}
return stack[stackPointer];
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCustomNonbondedForces.h"
__global__ void kFindBlockBoundsCutoff_kernel();
__global__ void kFindBlocksWithInteractionsCutoff_kernel();
__global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int* workUnit);
__global__ void kFindBlockBoundsPeriodic_kernel();
__global__ void kFindBlocksWithInteractionsPeriodic_kernel();
__global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int* workUnit);
void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
{
// printf("kCalculateCustomNonbondedCutoffForces\n");
CUDPPResult result;
switch (gpu->sim.customNonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedN2ByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateCustomNonbondedN2Forces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedN2Forces");
break;
case CUTOFF:
if (!neighborListValid)
{
kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsCutoff");
kFindBlocksWithInteractionsCutoff_kernel<<sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsCutoff");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksCutoff_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedCutoffByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedCutoffForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedCutoffForces");
break;
case PERIODIC:
if (!neighborListValid)
{
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
result = cudppCompact(gpu->cudpp, gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractionCount,
gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits);
if (result != CUDPP_SUCCESS)
{
printf("Error in cudppCompact: %d\n", result);
exit(-1);
}
kFindInteractionsWithinBlocksPeriodic_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCustomNonbondedPeriodicByWarpForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCustomNonbondedPeriodicForces_kernel<<sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+MAX_STACK_SIZE*sizeof(float)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCustomNonbondedPeriodicForces");
break;
}
}