Commit a163706b authored by Peter Eastman's avatar Peter Eastman
Browse files

cudaThreadExit() was not being called, which caused failures on some platforms

parent 030ef272
...@@ -1692,11 +1692,8 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync) ...@@ -1692,11 +1692,8 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
int SMMinor = 0; int SMMinor = 0;
// Select which device to use // Select which device to use
int currentDevice; cudaError_t status = cudaSetDevice(device);
cudaError_t status = cudaGetDevice(&currentDevice); RTERROR(status, "Error setting CUDA device")
RTERROR(status, "Error getting CUDA device")
if (device != currentDevice)
cudaSetDevice(device); // Ignore errors
status = cudaGetDevice(&gpu->device); status = cudaGetDevice(&gpu->device);
RTERROR(status, "Error getting CUDA device") RTERROR(status, "Error getting CUDA device")
status = cudaSetDeviceFlags(useBlockingSync ? cudaDeviceBlockingSync : cudaDeviceScheduleAuto); status = cudaSetDeviceFlags(useBlockingSync ? cudaDeviceBlockingSync : cudaDeviceScheduleAuto);
...@@ -1705,7 +1702,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync) ...@@ -1705,7 +1702,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
// Determine kernel call configuration // Determine kernel call configuration
cudaDeviceProp deviceProp; cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, currentDevice); cudaGetDeviceProperties(&deviceProp, gpu->device);
// Determine SM version // Determine SM version
if (deviceProp.major == 1) if (deviceProp.major == 1)
...@@ -2089,6 +2086,7 @@ void gpuShutDown(gpuContext gpu) ...@@ -2089,6 +2086,7 @@ void gpuShutDown(gpuContext gpu)
// Wrap up // Wrap up
delete gpu; delete gpu;
cudaThreadExit();
return; return;
} }
......
...@@ -294,17 +294,27 @@ void testCoulombLennardJones() { ...@@ -294,17 +294,27 @@ void testCoulombLennardJones() {
customSystem.addForce(customNonbonded); customSystem.addForce(customNonbonded);
VerletIntegrator integrator1(0.01); VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01); VerletIntegrator integrator2(0.01);
Context context1(standardSystem, integrator1, platform); double energy1, energy2;
context1.setPositions(positions); vector<Vec3> forces1, forces2;
context1.setVelocities(velocities); {
State state1 = context1.getState(State::Forces | State::Energy); Context context(standardSystem, integrator1, platform);
Context context2(customSystem, integrator2, platform); context.setPositions(positions);
context2.setPositions(positions); context.setVelocities(velocities);
context2.setVelocities(velocities); State state = context.getState(State::Forces | State::Energy);
State state2 = context2.getState(State::Forces | State::Energy); energy1 = state.getPotentialEnergy();
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-4); forces1 = state.getForces();
}
{
Context context(customSystem, integrator2, platform);
context.setPositions(positions);
context.setVelocities(velocities);
State state = context.getState(State::Forces | State::Energy);
energy2 = state.getPotentialEnergy();
forces2 = state.getForces();
}
ASSERT_EQUAL_TOL(energy1, energy2, 1e-4);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-4); ASSERT_EQUAL_VEC(forces1[i], forces2[i], 1e-4);
} }
} }
......
...@@ -116,11 +116,11 @@ void testEwaldPME() { ...@@ -116,11 +116,11 @@ void testEwaldPME() {
Vec3 f = cudaState.getForces()[i]; Vec3 f = cudaState.getForces()[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step); positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
} }
Context cudaContext2(system, integrator, cuda); cudaContext.reinitialize();
cudaContext2.setPositions(positions); cudaContext.setPositions(positions);
tol = 1e-3; tol = 1e-3;
State cudaState2 = cudaContext2.getState(State::Energy); State cudaState2 = cudaContext.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (cudaState2.getPotentialEnergy()-cudaState.getPotentialEnergy())/delta, tol) ASSERT_EQUAL_TOL(norm, (cudaState2.getPotentialEnergy()-cudaState.getPotentialEnergy())/delta, tol)
// (3) Check whether the Reference and Cuda platforms agree when using PME // (3) Check whether the Reference and Cuda platforms agree when using PME
...@@ -154,11 +154,11 @@ void testEwaldPME() { ...@@ -154,11 +154,11 @@ void testEwaldPME() {
Vec3 f = cudaState.getForces()[i]; Vec3 f = cudaState.getForces()[i];
positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step); positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
} }
Context cudaContext3(system, integrator, cuda); cudaContext.reinitialize();
cudaContext3.setPositions(positions); cudaContext.setPositions(positions);
tol = 1e-3; tol = 1e-3;
State cudaState3 = cudaContext3.getState(State::Energy); State cudaState3 = cudaContext.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (cudaState3.getPotentialEnergy()-cudaState.getPotentialEnergy())/delta, tol) ASSERT_EQUAL_TOL(norm, (cudaState3.getPotentialEnergy()-cudaState.getPotentialEnergy())/delta, tol)
} }
......
...@@ -131,7 +131,7 @@ void testExclusionsAnd14() { ...@@ -131,7 +131,7 @@ void testExclusionsAnd14() {
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
// Test LJ forces // Test LJ forces
vector<Vec3> positions(5); vector<Vec3> positions(5);
const double r = 1.0; const double r = 1.0;
for (int j = 0; j < 5; ++j) { for (int j = 0; j < 5; ++j) {
...@@ -143,49 +143,55 @@ void testExclusionsAnd14() { ...@@ -143,49 +143,55 @@ void testExclusionsAnd14() {
nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0); nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0);
nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0); nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0);
positions[i] = Vec3(r, 0, 0); positions[i] = Vec3(r, 0, 0);
Context context(system, integrator, platform); // The following is in its own block, because CUDA can't deal with multiple Contexts
context.setPositions(positions); // existing on the same thread at the same time.
State state = context.getState(State::Forces | State::Energy); {
const vector<Vec3>& forces = state.getForces(); Context context(system, integrator, platform);
double x = 1.5/r; context.setPositions(positions);
double eps = 1.0; State state = context.getState(State::Forces | State::Energy);
double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r; const vector<Vec3>& forces = state.getForces();
double energy = 4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0)); double x = 1.5/r;
if (i == 3) { double eps = 1.0;
force *= 0.5; double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r;
energy *= 0.5; double energy = 4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0));
} if (i == 3) {
if (i < 3) { force *= 0.5;
force = 0; energy *= 0.5;
energy = 0; }
if (i < 3) {
force = 0;
energy = 0;
}
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
} }
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
// Test Coulomb forces // Test Coulomb forces
nonbonded->setParticleParameters(0, 2, 1.5, 0); {
nonbonded->setParticleParameters(i, 2, 1.5, 0); nonbonded->setParticleParameters(0, 2, 1.5, 0);
nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? 4/1.2 : 0, 1.5, 0); nonbonded->setParticleParameters(i, 2, 1.5, 0);
nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0); nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? 4/1.2 : 0, 1.5, 0);
Context context2(system, integrator, platform); nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0);
context2.setPositions(positions); Context context(system, integrator, platform);
state = context2.getState(State::Forces | State::Energy); context.setPositions(positions);
const vector<Vec3>& forces2 = state.getForces(); State state = context.getState(State::Forces | State::Energy);
force = ONE_4PI_EPS0*4/(r*r); const vector<Vec3>& forces2 = state.getForces();
energy = ONE_4PI_EPS0*4/r; double force = ONE_4PI_EPS0*4/(r*r);
if (i == 3) { double energy = ONE_4PI_EPS0*4/r;
force /= 1.2; if (i == 3) {
energy /= 1.2; force /= 1.2;
} energy /= 1.2;
if (i < 3) { }
force = 0; if (i < 3) {
energy = 0; force = 0;
energy = 0;
}
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
} }
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment