Commit 8e725a70 authored by Lee-Ping's avatar Lee-Ping
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents 4334eca9 0e5073cf
...@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4752,22 +4752,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines["SUM_BUFFER_SIZE"] = "0"; defines["SUM_BUFFER_SIZE"] = "0";
defines["SUM_OUTPUT_INDEX"] = "0"; defines["SUM_OUTPUT_INDEX"] = "0";
// Initialize the random number generator.
uniformRandoms = CudaArray::create<float4>(cu, cu.getNumAtoms(), "uniformRandoms");
randomSeed = CudaArray::create<int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
vector<int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed->upload(seed);
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Build a list of all variables that affect the forces, so we can tell which // Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them. // steps invalidate them.
...@@ -4858,10 +4842,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4858,10 +4842,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for (int step = 1; step < numSteps; step++) { for (int step = 1; step < numSteps; step++) {
if (needsForces[step] || needsEnergy[step]) if (needsForces[step] || needsEnergy[step])
continue; continue;
if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal) if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal &&
!usesVariable(expression[step], "uniform") && !usesVariable(expression[step], "gaussian"))
merged[step] = true; merged[step] = true;
if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof && if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof)
!usesVariable(expression[step], "uniform"))
merged[step] = true; merged[step] = true;
} }
...@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4880,7 +4864,13 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
} }
int numGaussian = 0, numUniform = 0; int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) { for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform");
compute << "{\n"; compute << "{\n";
if (numGaussian > 0)
compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n";
if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]); compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
if (variable[j] == "x") { if (variable[j] == "x") {
...@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4899,9 +4889,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n"; compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n";
} }
} }
if (numGaussian > 0)
compute << "gaussianIndex += NUM_ATOMS;\n";
if (numUniform > 0)
compute << "uniformIndex += NUM_ATOMS;\n";
compute << "}\n"; compute << "}\n";
numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform");
} }
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str(); replacements["COMPUTE_STEP"] = compute.str();
...@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -4931,9 +4923,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(&globalValues->getDevicePointer()); args1.push_back(&globalValues->getDevicePointer());
args1.push_back(&contextParameterValues->getDevicePointer()); args1.push_back(&contextParameterValues->getDevicePointer());
args1.push_back(&sumBuffer->getDevicePointer()); args1.push_back(&sumBuffer->getDevicePointer());
args1.push_back(&integration.getRandom().getDevicePointer());
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(&uniformRandoms->getDevicePointer()); args1.push_back(NULL);
args1.push_back(NULL);
args1.push_back(&potentialEnergy->getDevicePointer()); args1.push_back(&potentialEnergy->getDevicePointer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
args1.push_back(&perDofValues->getBuffers()[i].getMemory()); args1.push_back(&perDofValues->getBuffers()[i].getMemory());
...@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5000,6 +4992,25 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
} }
} }
// Initialize the random number generator.
int maxUniformRandoms = 1;
for (int i = 0; i < (int) requiredUniform.size(); i++)
maxUniformRandoms = max(maxUniformRandoms, requiredUniform[i]);
uniformRandoms = CudaArray::create<float4>(cu, maxUniformRandoms, "uniformRandoms");
randomSeed = CudaArray::create<int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
vector<int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed->upload(seed);
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Create the kernel for summing the potential energy. // Create the kernel for summing the potential energy.
defines["SUM_OUTPUT_INDEX"] = "0"; defines["SUM_OUTPUT_INDEX"] = "0";
...@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -5042,7 +5053,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(&globalValues->getDevicePointer()); kineticEnergyArgs.push_back(&globalValues->getDevicePointer());
kineticEnergyArgs.push_back(&contextParameterValues->getDevicePointer()); kineticEnergyArgs.push_back(&contextParameterValues->getDevicePointer());
kineticEnergyArgs.push_back(&sumBuffer->getDevicePointer()); kineticEnergyArgs.push_back(&sumBuffer->getDevicePointer());
kineticEnergyArgs.push_back(&integration.getRandom().getDevicePointer()); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer()); kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer());
kineticEnergyArgs.push_back(&potentialEnergy->getDevicePointer()); kineticEnergyArgs.push_back(&potentialEnergy->getDevicePointer());
...@@ -5112,7 +5123,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -5112,7 +5123,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Loop over computation steps in the integrator and execute them. // Loop over computation steps in the integrator and execute them.
void* randomArgs[] = {&uniformRandoms->getDevicePointer(), &randomSeed->getDevicePointer()}; int maxUniformRandoms = uniformRandoms->getSize();
void* randomArgs[] = {&maxUniformRandoms, &uniformRandoms->getDevicePointer(), &randomSeed->getDevicePointer()};
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0); CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
for (int i = 0; i < numSteps; i++) { for (int i = 0; i < numSteps; i++) {
int lastForceGroups = context.getLastForceGroups(); int lastForceGroups = context.getLastForceGroups();
...@@ -5161,7 +5173,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -5161,7 +5173,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) { if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]); int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
kernelArgs[i][0][1] = &posCorrection; kernelArgs[i][0][1] = &posCorrection;
kernelArgs[i][0][9] = &integration.getRandom().getDevicePointer();
kernelArgs[i][0][10] = &randomIndex; kernelArgs[i][0][10] = &randomIndex;
kernelArgs[i][0][11] = &uniformRandoms->getDevicePointer();
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms); cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms); cu.executeKernel(kernels[i][0], &kernelArgs[i][0][0], numAtoms);
...@@ -5176,7 +5190,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -5176,7 +5190,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else if (stepType[i] == CustomIntegrator::ComputeSum) { else if (stepType[i] == CustomIntegrator::ComputeSum) {
int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]); int randomIndex = integration.prepareRandomNumbers(requiredGaussian[i]);
kernelArgs[i][0][1] = &posCorrection; kernelArgs[i][0][1] = &posCorrection;
kernelArgs[i][0][9] = &integration.getRandom().getDevicePointer();
kernelArgs[i][0][10] = &randomIndex; kernelArgs[i][0][10] = &randomIndex;
kernelArgs[i][0][11] = &uniformRandoms->getDevicePointer();
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cu.executeKernel(randomKernel, &randomArgs[0], numAtoms); cu.executeKernel(randomKernel, &randomArgs[0], numAtoms);
cu.clearBuffer(*sumBuffer); cu.clearBuffer(*sumBuffer);
...@@ -5227,6 +5243,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -5227,6 +5243,7 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0); CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
int randomIndex = 0; int randomIndex = 0;
kineticEnergyArgs[1] = &posCorrection; kineticEnergyArgs[1] = &posCorrection;
kineticEnergyArgs[9] = &cu.getIntegrationUtilities().getRandom().getDevicePointer();
kineticEnergyArgs[10] = &randomIndex; kineticEnergyArgs[10] = &randomIndex;
cu.clearBuffer(*sumBuffer); cu.clearBuffer(*sumBuffer);
cu.executeKernel(kineticEnergyKernel, &kineticEnergyArgs[0], cu.getNumAtoms()); cu.executeKernel(kineticEnergyKernel, &kineticEnergyArgs[0], cu.getNumAtoms());
......
...@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -457,7 +457,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
} }
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
bool useShuffle = (context.getComputeCapability() >= 3.0); int cudaVersion;
cuDriverGetVersion(&cudaVersion);
bool useShuffle = (context.getComputeCapability() >= 3.0 && cudaVersion >= 5050);
// Part 1. Defines for on diagonal exclusion tiles // Part 1. Defines for on diagonal exclusion tiles
stringstream loadLocal1; stringstream loadLocal1;
......
...@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4* ...@@ -52,10 +52,10 @@ extern "C" __global__ void applyPositionDeltas(real4* __restrict__ posq, real4*
} }
} }
extern "C" __global__ void generateRandomNumbers(float4* __restrict__ random, uint4* __restrict__ seed) { extern "C" __global__ void generateRandomNumbers(int numValues, float4* __restrict__ random, uint4* __restrict__ seed) {
uint4 state = seed[blockIdx.x*blockDim.x+threadIdx.x]; uint4 state = seed[blockIdx.x*blockDim.x+threadIdx.x];
unsigned int carry = 0; unsigned int carry = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numValues; index += blockDim.x*gridDim.x) {
// Generate three uniform random numbers. // Generate three uniform random numbers.
state.x = state.x * 69069 + 1; state.x = state.x * 69069 + 1;
......
...@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) { ...@@ -34,11 +34,10 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta, extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int randomIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
randomIndex += index;
const double forceScale = 1.0/0xFFFFFFFF; const double forceScale = 1.0/0xFFFFFFFF;
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
#ifdef LOAD_POS_AS_DELTA #ifdef LOAD_POS_AS_DELTA
...@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest ...@@ -50,11 +49,10 @@ extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __rest
double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0); double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0);
double mass = 1.0/velocity.w; double mass = 1.0/velocity.w;
if (velocity.w != 0.0) { if (velocity.w != 0.0) {
float4 gaussian = gaussianValues[randomIndex]; int gaussianIndex = gaussianBaseIndex;
float4 uniform = uniformValues[index]; int uniformIndex = 0;
COMPUTE_STEP COMPUTE_STEP
} }
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x; index += blockDim.x*gridDim.x;
} }
} }
...@@ -651,6 +651,72 @@ void testRespa() { ...@@ -651,6 +651,72 @@ void testRespa() {
} }
} }
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void testMergedRandoms() {
const int numParticles = 10;
const int numSteps = 10;
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomIntegrator integrator(0.1);
integrator.addPerDofVariable("dofUniform1", 0);
integrator.addPerDofVariable("dofUniform2", 0);
integrator.addPerDofVariable("dofGaussian1", 0);
integrator.addPerDofVariable("dofGaussian2", 0);
integrator.addGlobalVariable("globalUniform1", 0);
integrator.addGlobalVariable("globalUniform2", 0);
integrator.addGlobalVariable("globalGaussian1", 0);
integrator.addGlobalVariable("globalGaussian2", 0);
integrator.addComputePerDof("dofUniform1", "uniform");
integrator.addComputePerDof("dofUniform2", "uniform");
integrator.addComputePerDof("dofGaussian1", "gaussian");
integrator.addComputePerDof("dofGaussian2", "gaussian");
integrator.addComputeGlobal("globalUniform1", "uniform");
integrator.addComputeGlobal("globalUniform2", "uniform");
integrator.addComputeGlobal("globalGaussian1", "gaussian");
integrator.addComputeGlobal("globalGaussian2", "gaussian");
Context context(system, integrator, platform);
// See if the random numbers are computed correctly.
vector<Vec3> values1, values2;
for (int i = 0; i < numSteps; i++) {
integrator.step(1);
integrator.getPerDofVariable(0, values1);
integrator.getPerDofVariable(1, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
}
integrator.getPerDofVariable(2, values1);
integrator.getPerDofVariable(3, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
double v1 = integrator.getGlobalVariable(0);
double v2 = integrator.getGlobalVariable(1);
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
v1 = integrator.getGlobalVariable(2);
v2 = integrator.getGlobalVariable(3);
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) { ...@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables(); testPerDofVariables();
testForceGroups(); testForceGroups();
testRespa(); testRespa();
testMergedRandoms();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -4980,24 +4980,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize); defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize);
// Initialize the random number generator.
uniformRandoms = OpenCLArray::create<mm_float4>(cl, cl.getNumAtoms(), "uniformRandoms");
randomSeed = OpenCLArray::create<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed->upload(seed);
cl::Program randomProgram = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers");
randomKernel.setArg<cl::Buffer>(0, uniformRandoms->getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(1, randomSeed->getDeviceBuffer());
// Build a list of all variables that affect the forces, so we can tell which // Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them. // steps invalidate them.
...@@ -5088,10 +5070,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5088,10 +5070,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
for (int step = 1; step < numSteps; step++) { for (int step = 1; step < numSteps; step++) {
if (needsForces[step] || needsEnergy[step]) if (needsForces[step] || needsEnergy[step])
continue; continue;
if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal) if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal &&
!usesVariable(expression[step], "uniform") && !usesVariable(expression[step], "gaussian"))
merged[step] = true; merged[step] = true;
if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof && if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof)
!usesVariable(expression[step], "uniform"))
merged[step] = true; merged[step] = true;
} }
...@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5110,7 +5092,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
} }
int numGaussian = 0, numUniform = 0; int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) { for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform");
compute << "{\n"; compute << "{\n";
if (numGaussian > 0)
compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n";
if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]); compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
if (variable[j] == "x") { if (variable[j] == "x") {
...@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5133,9 +5121,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+2] = perDofz"<<cl.intToString(i+1)<<";\n"; compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+2] = perDofz"<<cl.intToString(i+1)<<";\n";
} }
} }
if (numGaussian > 0)
compute << "gaussianIndex += NUM_ATOMS;\n";
if (numUniform > 0)
compute << "uniformIndex += NUM_ATOMS;\n";
compute << "}\n"; compute << "}\n";
numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform");
} }
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str(); replacements["COMPUTE_STEP"] = compute.str();
...@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5165,9 +5155,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, integration.getRandom().getDeviceBuffer()); index += 3;
index++;
kernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
...@@ -5229,6 +5217,28 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5229,6 +5217,28 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
} }
} }
// Initialize the random number generator.
int maxUniformRandoms = 1;
for (int i = 0; i < (int) requiredUniform.size(); i++)
maxUniformRandoms = max(maxUniformRandoms, requiredUniform[i]);
uniformRandoms = OpenCLArray::create<mm_float4>(cl, maxUniformRandoms, "uniformRandoms");
randomSeed = OpenCLArray::create<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed->upload(seed);
cl::Program randomProgram = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers");
randomKernel.setArg<cl_int>(0, maxUniformRandoms);
randomKernel.setArg<cl::Buffer>(1, uniformRandoms->getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer());
// Create the kernel for summing the potential energy. // Create the kernel for summing the potential energy.
cl::Program program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines); cl::Program program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
...@@ -5274,8 +5284,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -5274,8 +5284,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getRandom().getDeviceBuffer()); index += 2;
kineticEnergyKernel.setArg<cl_uint>(index++, 0);
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
...@@ -5392,6 +5401,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -5392,6 +5401,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
} }
if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) { if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.executeKernel(kernels[i][0], numAtoms); cl.executeKernel(kernels[i][0], numAtoms);
...@@ -5403,6 +5414,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -5403,6 +5414,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
} }
else if (stepType[i] == CustomIntegrator::ComputeSum) { else if (stepType[i] == CustomIntegrator::ComputeSum) {
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
...@@ -5454,6 +5467,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -5454,6 +5467,8 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
forcesAreValid = true; forcesAreValid = true;
} }
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
kineticEnergyKernel.setArg<cl::Buffer>(9, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(10, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms()); cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize); cl.executeKernel(sumKineticEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
......
...@@ -52,10 +52,10 @@ __kernel void applyPositionDeltas(__global real4* restrict posq, __global real4* ...@@ -52,10 +52,10 @@ __kernel void applyPositionDeltas(__global real4* restrict posq, __global real4*
} }
} }
__kernel void generateRandomNumbers(__global float4* restrict random, __global uint4* restrict seed) { __kernel void generateRandomNumbers(int numValues, __global float4* restrict random, __global uint4* restrict seed) {
uint4 state = seed[get_global_id(0)]; uint4 state = seed[get_global_id(0)];
unsigned int carry = 0; unsigned int carry = 0;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { for (int index = get_global_id(0); index < numValues; index += get_global_size(0)) {
// Generate three uniform random numbers. // Generate three uniform random numbers.
state.x = state.x * 69069 + 1; state.x = state.x * 69069 + 1;
......
...@@ -26,11 +26,10 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec ...@@ -26,11 +26,10 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta, __kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues, __global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues,
unsigned int randomIndex, __global const float4* restrict uniformValues, __global const real* restrict energy unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, __global const real* restrict energy
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = get_global_id(0); int index = get_global_id(0);
randomIndex += index;
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
#ifdef LOAD_POS_AS_DELTA #ifdef LOAD_POS_AS_DELTA
mixed4 position = loadPos(posq, posqCorrection, index)+posDelta[index]; mixed4 position = loadPos(posq, posqCorrection, index)+posDelta[index];
...@@ -41,11 +40,10 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr ...@@ -41,11 +40,10 @@ __kernel void computePerDof(__global real4* restrict posq, __global real4* restr
real4 f = force[index]; real4 f = force[index];
mixed mass = 1/velocity.w; mixed mass = 1/velocity.w;
if (velocity.w != 0.0) { if (velocity.w != 0.0) {
float4 gaussian = gaussianValues[randomIndex]; int gaussianIndex = gaussianBaseIndex;
float4 uniform = uniformValues[index]; int uniformIndex = 0;
COMPUTE_STEP COMPUTE_STEP
} }
randomIndex += get_global_size(0);
index += get_global_size(0); index += get_global_size(0);
} }
} }
...@@ -651,6 +651,72 @@ void testRespa() { ...@@ -651,6 +651,72 @@ void testRespa() {
} }
} }
/**
* Make sure random numbers are computed correctly when steps get merged.
*/
void testMergedRandoms() {
const int numParticles = 10;
const int numSteps = 10;
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomIntegrator integrator(0.1);
integrator.addPerDofVariable("dofUniform1", 0);
integrator.addPerDofVariable("dofUniform2", 0);
integrator.addPerDofVariable("dofGaussian1", 0);
integrator.addPerDofVariable("dofGaussian2", 0);
integrator.addGlobalVariable("globalUniform1", 0);
integrator.addGlobalVariable("globalUniform2", 0);
integrator.addGlobalVariable("globalGaussian1", 0);
integrator.addGlobalVariable("globalGaussian2", 0);
integrator.addComputePerDof("dofUniform1", "uniform");
integrator.addComputePerDof("dofUniform2", "uniform");
integrator.addComputePerDof("dofGaussian1", "gaussian");
integrator.addComputePerDof("dofGaussian2", "gaussian");
integrator.addComputeGlobal("globalUniform1", "uniform");
integrator.addComputeGlobal("globalUniform2", "uniform");
integrator.addComputeGlobal("globalGaussian1", "gaussian");
integrator.addComputeGlobal("globalGaussian2", "gaussian");
Context context(system, integrator, platform);
// See if the random numbers are computed correctly.
vector<Vec3> values1, values2;
for (int i = 0; i < numSteps; i++) {
integrator.step(1);
integrator.getPerDofVariable(0, values1);
integrator.getPerDofVariable(1, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
}
integrator.getPerDofVariable(2, values1);
integrator.getPerDofVariable(3, values2);
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
double v1 = values1[i][j];
double v2 = values2[i][j];
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
double v1 = integrator.getGlobalVariable(0);
double v2 = integrator.getGlobalVariable(1);
ASSERT(v1 >= 0 && v1 < 1);
ASSERT(v2 >= 0 && v2 < 1);
ASSERT(v1 != v2);
v1 = integrator.getGlobalVariable(2);
v2 = integrator.getGlobalVariable(3);
ASSERT(v1 >= -10 && v1 < 10);
ASSERT(v2 >= -10 && v2 < 10);
ASSERT(v1 != v2);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) { ...@@ -666,6 +732,7 @@ int main(int argc, char* argv[]) {
testPerDofVariables(); testPerDofVariables();
testForceGroups(); testForceGroups();
testRespa(); testRespa();
testMergedRandoms();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment