Commit 3862202e authored by Justin MacCallum's avatar Justin MacCallum
Browse files

Merge branch 'upstream' into fork

parents e1a4e015 73882ac5
......@@ -181,7 +181,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "ASIN(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ACOS:
out << "ACSO(" << getTempName(node.getChildren()[0], temps) << ")";
out << "ACOS(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ATAN:
out << "ATAN(" << getTempName(node.getChildren()[0], temps) << ")";
......
This diff is collapsed.
......@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
bool useShuffle = (context.getComputeCapability() >= 3.0);
int cudaVersion;
cuDriverGetVersion(&cudaVersion);
bool useShuffle = (context.getComputeCapability() >= 3.0 && cudaVersion >= 5050);
// Part 1. Defines for on diagonal exclusion tiles
stringstream loadLocal1;
......
......@@ -87,12 +87,14 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaDeviceName());
platformProperties.push_back(CudaUseBlockingSync());
platformProperties.push_back(CudaPrecision());
platformProperties.push_back(CudaUseCpuPme());
platformProperties.push_back(CudaCompiler());
platformProperties.push_back(CudaTempDirectory());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
setPropertyDefaultValue(CudaPrecision(), "single");
setPropertyDefaultValue(CudaUseCpuPme(), "false");
#ifdef _MSC_VER
char* bindir = getenv("CUDA_BIN_PATH");
string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
......@@ -141,13 +143,20 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
string cpuPmePropValue = (properties.find(CudaUseCpuPme()) == properties.end() ?
getPropertyDefaultValue(CudaUseCpuPme()) : properties.find(CudaUseCpuPme())->second);
const string& compilerPropValue = (properties.find(CudaCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
context.setPlatformData(new PlatformData(context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, compilerPropValue, tempPropValue));
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
vector<string> pmeKernelName;
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -155,8 +164,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete data;
}
CudaPlatform::PlatformData::PlatformData(const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& compilerProperty, const string& tempProperty) : removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
size_t searchPos = 0, nextPos;
......@@ -185,10 +194,12 @@ CudaPlatform::PlatformData::PlatformData(const System& system, const string& dev
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name;
}
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
contextEnergy.resize(contexts.size());
......
......@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4*
}
}
extern "C" __global__ void generateRandomNumbers(float4* __restrict__ random, uint4* __restrict__ seed) {
extern "C" __global__ void generateRandomNumbers(int numValues, float4* __restrict__ random, uint4* __restrict__ seed) {
uint4 state = seed[blockIdx.x*blockDim.x+threadIdx.x];
unsigned int carry = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numValues; index += blockDim.x*gridDim.x) {
// Generate three uniform random numbers.
state.x = state.x * 69069 + 1;
......
......@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int randomIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
randomIndex += index;
const double forceScale = 1.0/0xFFFFFFFF;
while (index < NUM_ATOMS) {
#ifdef LOAD_POS_AS_DELTA
......@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0);
double mass = 1.0/velocity.w;
if (velocity.w != 0.0) {
float4 gaussian = gaussianValues[randomIndex];
float4 uniform = uniformValues[index];
int gaussianIndex = gaussianBaseIndex;
int uniformIndex = 0;
COMPUTE_STEP
}
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x;
}
}
......@@ -364,9 +364,9 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
mixed zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
mixed axlng = SQRT(xaksXd*xaksXd + yaksXd*yaksXd + zaksXd*zaksXd);
mixed aylng = SQRT(xaksYd*xaksYd + yaksYd*yaksYd + zaksYd*zaksYd);
mixed azlng = SQRT(xaksZd*xaksZd + yaksZd*yaksZd + zaksZd*zaksZd);
mixed axlng = sqrt(xaksXd*xaksXd + yaksXd*yaksXd + zaksXd*zaksXd);
mixed aylng = sqrt(xaksYd*xaksYd + yaksYd*yaksYd + zaksYd*zaksYd);
mixed azlng = sqrt(xaksZd*xaksZd + yaksZd*yaksZd + zaksZd*zaksZd);
mixed trns11 = xaksXd / axlng;
mixed trns21 = yaksXd / axlng;
mixed trns31 = zaksXd / axlng;
......@@ -392,13 +392,13 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
// --- Step2 A2' ---
float rc = 0.5f*params.y;
mixed rb = SQRT(params.x*params.x-rc*rc);
mixed rb = sqrt(params.x*params.x-rc*rc);
mixed ra = rb*(m1+m2)*invTotalMass;
rb -= ra;
mixed sinphi = za1d/ra;
mixed cosphi = SQRT(1-sinphi*sinphi);
mixed cosphi = sqrt(1-sinphi*sinphi);
mixed sinpsi = (zb1d-zc1d) / (2*rc*cosphi);
mixed cospsi = SQRT(1-sinpsi*sinpsi);
mixed cospsi = sqrt(1-sinpsi*sinpsi);
mixed ya2d = ra*cosphi;
mixed xb2d = - rc*cospsi;
......@@ -406,7 +406,7 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed yc2d = - rb*cosphi + rc*sinpsi*sinphi;
mixed xb2d2 = xb2d*xb2d;
mixed hh2 = 4.0f*xb2d2 + (yb2d-yc2d)*(yb2d-yc2d) + (zb1d-zc1d)*(zb1d-zc1d);
mixed deltx = 2.0f*xb2d + SQRT(4.0f*xb2d2 - hh2 + params.y*params.y);
mixed deltx = 2.0f*xb2d + sqrt(4.0f*xb2d2 - hh2 + params.y*params.y);
xb2d -= deltx*0.5f;
// --- Step3 al,be,ga ---
......@@ -416,11 +416,11 @@ extern "C" __global__ void applySettleToPositions(int numClusters, mixed tol, co
mixed gamma = xb0d*yb1d - xb1d*yb0d + xc0d*yc1d - xc1d*yc0d;
mixed al2be2 = alpha*alpha + beta*beta;
mixed sintheta = (alpha*gamma - beta*SQRT(al2be2 - gamma*gamma)) / al2be2;
mixed sintheta = (alpha*gamma - beta*sqrt(al2be2 - gamma*gamma)) / al2be2;
// --- Step4 A3' ---
mixed costheta = SQRT(1-sintheta*sintheta);
mixed costheta = sqrt(1-sintheta*sintheta);
mixed xa3d = - ya2d*sintheta;
mixed ya3d = ya2d*costheta;
mixed za3d = za1d;
......
......@@ -266,8 +266,18 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
real q = pos.w*EPSILON_FACTOR;
forceBuffers[atom] += static_cast<unsigned long long>((long long) (-q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000));
forceBuffers[atom] += static_cast<unsigned long long>((long long) (-q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000));
}
}
extern "C" __global__
void addForces(const real4* __restrict__ forces, unsigned long long* __restrict__ forceBuffers) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real4 f = forces[atom];
forceBuffers[atom] += static_cast<unsigned long long>((long long) (f.x*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000));
}
}
......@@ -40,7 +40,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -43,7 +43,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/BrownianIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -40,7 +40,7 @@
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -42,7 +42,7 @@
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -39,7 +39,7 @@
#include "openmm/CustomBondForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
......
......@@ -39,7 +39,7 @@
#include "openmm/CustomExternalForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -41,7 +41,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/CustomIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......@@ -651,6 +651,72 @@ void testRespa() {
}
}
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void testMergedRandoms() {
const int numParticles = 10;
const int numSteps = 10;
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomIntegrator integrator(0.1);
integrator.addPerDofVariable("dofUniform1", 0);
integrator.addPerDofVariable("dofUniform2", 0);
integrator.addPerDofVariable("dofGaussian1", 0);
integrator.addPerDofVariable("dofGaussian2", 0);
integrator.addGlobalVariable("globalUniform1", 0);
integrator.addGlobalVariable("globalUniform2", 0);
integrator.addGlobalVariable("globalGaussian1", 0);
integrator.addGlobalVariable("globalGaussian2", 0);
integrator.addComputePerDof("dofUniform1", "uniform");
integrator.addComputePerDof("dofUniform2", "uniform");
integrator.addComputePerDof("dofGaussian1", "gaussian");
integrator.addComputePerDof("dofGaussian2", "gaussian");
integrator.addComputeGlobal("globalUniform1", "uniform");
integrator.addComputeGlobal("globalUniform2", "uniform");
integrator.addComputeGlobal("globalGaussian1", "gaussian");
integrator.addComputeGlobal("globalGaussian2", "gaussian");
Context context(system, integrator, platform);
// See if the random numbers are computed correctly.
vector<Vec3> values1, values2;
for (int i = 0; i < numSteps; i++) {
integrator.step(1);
integrator.getPerDofVariable(0, values1);
integrator.getPerDofVariable(1, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
}
integrator.getPerDofVariable(2, values1);
integrator.getPerDofVariable(3, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
double v1 = integrator.getGlobalVariable(0);
double v2 = integrator.getGlobalVariable(1);
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
v1 = integrator.getGlobalVariable(2);
v2 = integrator.getGlobalVariable(3);
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
......@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables();
testForceGroups();
testRespa();
testMergedRandoms();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -42,7 +42,7 @@
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/internal/ContextImpl.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -40,7 +40,7 @@
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include "openmm/NonbondedForce.h"
#include <iostream>
......
......@@ -40,7 +40,7 @@
#include "openmm/HarmonicAngleForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
......
......@@ -40,7 +40,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
......
......@@ -42,7 +42,7 @@
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include <iostream>
#include <vector>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment