Commit cd6af26e authored by Peter Eastman's avatar Peter Eastman
Browse files

RPMD supports mixed and double precision

parent b8b2e1ef
...@@ -91,7 +91,7 @@ private: ...@@ -91,7 +91,7 @@ private:
CudaArray* forces; CudaArray* forces;
CudaArray* positions; CudaArray* positions;
CudaArray* velocities; CudaArray* velocities;
CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; CUfunction pileKernel, stepKernel, velocitiesKernel, copyPositionsToContextKernel, copyVelocitiesToContextKernel, copyForcesFromContextKernel, translateKernel;
}; };
} // namespace OpenMM } // namespace OpenMM
......
__device__ float3 multiplyComplexRealPart(float2 c1, float3 c2r, float3 c2i) { __device__ mixed3 multiplyComplexRealPart(mixed2 c1, mixed3 c2r, mixed3 c2i) {
return c1.x*c2r-c1.y*c2i; return c1.x*c2r-c1.y*c2i;
} }
__device__ float3 multiplyComplexImagPart(float2 c1, float3 c2r, float3 c2i) { __device__ mixed3 multiplyComplexImagPart(mixed2 c1, mixed3 c2r, mixed3 c2i) {
return c1.x*c2i+c1.y*c2r; return c1.x*c2i+c1.y*c2r;
} }
__device__ float3 multiplyComplexRealPartConj(float2 c1, float3 c2r, float3 c2i) { __device__ mixed3 multiplyComplexRealPartConj(mixed2 c1, mixed3 c2r, mixed3 c2i) {
return c1.x*c2r+c1.y*c2i; return c1.x*c2r+c1.y*c2i;
} }
__device__ float3 multiplyComplexImagPartConj(float2 c1, float3 c2r, float3 c2i) { __device__ mixed3 multiplyComplexImagPartConj(mixed2 c1, mixed3 c2r, mixed3 c2i) {
return c1.x*c2i-c1.y*c2r; return c1.x*c2i-c1.y*c2r;
} }
/** /**
* Apply the PILE-L thermostat. * Apply the PILE-L thermostat.
*/ */
extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, unsigned int randomIndex, extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, unsigned int randomIndex,
float dt, float kT, float friction) { mixed dt, mixed kT, mixed friction) {
const int numBlocks = blockDim.x*gridDim.x/NUM_COPIES; const int numBlocks = blockDim.x*gridDim.x/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES); const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
const float nkT = NUM_COPIES*kT; const mixed nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR; const mixed twown = 2.0f*nkT/HBAR;
const float c1_0 = EXP(-0.5f*dt*friction); const mixed c1_0 = EXP(-0.5f*dt*friction);
const float c2_0 = SQRT(1.0f-c1_0*c1_0); const mixed c2_0 = SQRT(1.0f-c1_0*c1_0);
__shared__ float3 v[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 v[2*THREAD_BLOCK_SIZE];
__shared__ float3 temp[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ float2 w[NUM_COPIES]; __shared__ mixed2 w[NUM_COPIES];
float3* vreal = &v[blockStart]; mixed3* vreal = &v[blockStart];
float3* vimag = &v[blockStart+blockDim.x]; mixed3* vimag = &v[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES) if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_float2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
__syncthreads(); __syncthreads();
randomIndex += NUM_COPIES*((blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES); randomIndex += NUM_COPIES*((blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES);
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w; mixed invMass = particleVelm.w;
float c3_0 = c2_0*SQRT(nkT*invMass); mixed c3_0 = c2_0*SQRT(nkT*invMass);
// Forward FFT. // Forward FFT.
vreal[indexInBlock] = SCALE*make_float3(particleVelm.x, particleVelm.y, particleVelm.z); vreal[indexInBlock] = SCALE*make_mixed3(particleVelm.x, particleVelm.y, particleVelm.z);
vimag[indexInBlock] = make_float3(0); vimag[indexInBlock] = make_mixed3(0);
__syncthreads(); __syncthreads();
FFT_V_FORWARD FFT_V_FORWARD
...@@ -53,28 +53,28 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns ...@@ -53,28 +53,28 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
// Apply a local Langevin thermostat to the centroid mode. // Apply a local Langevin thermostat to the centroid mode.
float4 rand = random[randomIndex]; float4 rand = random[randomIndex];
vreal[0] = vreal[0]*c1_0 + c3_0*make_float3(rand.x, rand.y, rand.z); vreal[0] = vreal[0]*c1_0 + c3_0*make_mixed3(rand.x, rand.y, rand.z);
} }
else { else {
// Use critical damping white noise for the remaining modes. // Use critical damping white noise for the remaining modes.
int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock); int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock);
const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2); const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2);
const float wk = twown*sin(k*M_PI/NUM_COPIES); const mixed wk = twown*sin(k*M_PI/NUM_COPIES);
const float c1 = EXP(-wk*dt); const mixed c1 = EXP(-wk*dt);
const float c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f); const mixed c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f);
const float c3 = c2*SQRT(nkT*invMass); const mixed c3 = c2*SQRT(nkT*invMass);
float4 rand1 = c3*random[randomIndex+k]; float4 rand1 = random[randomIndex+k];
float4 rand2 = (isCenter ? make_float4(0) : c3*random[randomIndex+NUM_COPIES-k]); float4 rand2 = (isCenter ? make_float4(0) : random[randomIndex+NUM_COPIES-k]);
vreal[indexInBlock] = c1*vreal[indexInBlock] + make_float3(rand1.x, rand1.y, rand1.z); vreal[indexInBlock] = c1*vreal[indexInBlock] + c3*make_mixed3(rand1.x, rand1.y, rand1.z);
vimag[indexInBlock] = c1*vimag[indexInBlock] + (indexInBlock < NUM_COPIES/2 ? make_float3(rand2.x, rand2.y, rand2.z) : make_float3(-rand2.x, -rand2.y, -rand2.z)); vimag[indexInBlock] = c1*vimag[indexInBlock] + c3*(indexInBlock < NUM_COPIES/2 ? make_mixed3(rand2.x, rand2.y, rand2.z) : make_mixed3(-rand2.x, -rand2.y, -rand2.z));
} }
__syncthreads(); __syncthreads();
// Inverse FFT. // Inverse FFT.
FFT_V_BACKWARD FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w); velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
randomIndex += blockDim.x*gridDim.x; randomIndex += blockDim.x*gridDim.x;
} }
} }
...@@ -82,24 +82,24 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns ...@@ -82,24 +82,24 @@ extern "C" __global__ void applyPileThermostat(float4* velm, float4* random, uns
/** /**
* Advance the positions and velocities. * Advance the positions and velocities.
*/ */
extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long* force, float dt, float kT) { extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long* force, mixed dt, mixed kT) {
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES; const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES); const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
const float nkT = NUM_COPIES*kT; const mixed nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR; const mixed twown = 2.0f*nkT/HBAR;
const float forceScale = 1/(float) 0xFFFFFFFF; const mixed forceScale = 1/(mixed) 0xFFFFFFFF;
__shared__ float3 q[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
__shared__ float3 v[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 v[2*THREAD_BLOCK_SIZE];
__shared__ float3 temp[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ float2 w[NUM_COPIES]; __shared__ mixed2 w[NUM_COPIES];
// Update velocities. // Update velocities.
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3; int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
float4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w); particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w); particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w); particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
...@@ -108,23 +108,23 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long* ...@@ -108,23 +108,23 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
// Evolve the free ring polymer by transforming to the frequency domain. // Evolve the free ring polymer by transforming to the frequency domain.
float3* qreal = &q[blockStart]; mixed3* qreal = &q[blockStart];
float3* qimag = &q[blockStart+blockDim.x]; mixed3* qimag = &q[blockStart+blockDim.x];
float3* vreal = &v[blockStart]; mixed3* vreal = &v[blockStart];
float3* vimag = &v[blockStart+blockDim.x]; mixed3* vimag = &v[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES) if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_float2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
__syncthreads(); __syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
// Forward FFT. // Forward FFT.
qreal[indexInBlock] = SCALE*make_float3(particlePosq.x, particlePosq.y, particlePosq.z); qreal[indexInBlock] = SCALE*make_mixed3(particlePosq.x, particlePosq.y, particlePosq.z);
qimag[indexInBlock] = make_float3(0); qimag[indexInBlock] = make_mixed3(0);
vreal[indexInBlock] = SCALE*make_float3(particleVelm.x, particleVelm.y, particleVelm.z); vreal[indexInBlock] = SCALE*make_mixed3(particleVelm.x, particleVelm.y, particleVelm.z);
vimag[indexInBlock] = make_float3(0); vimag[indexInBlock] = make_mixed3(0);
__syncthreads(); __syncthreads();
FFT_Q_FORWARD FFT_Q_FORWARD
FFT_V_FORWARD FFT_V_FORWARD
...@@ -136,12 +136,12 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long* ...@@ -136,12 +136,12 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
qimag[0] += vimag[0]*dt; qimag[0] += vimag[0]*dt;
} }
else { else {
const float wk = twown*sin(indexInBlock*M_PI/NUM_COPIES); const mixed wk = twown*sin(indexInBlock*M_PI/NUM_COPIES);
const float wt = wk*dt; const mixed wt = wk*dt;
const float coswt = cos(wt); const mixed coswt = cos(wt);
const float sinwt = sin(wt); const mixed sinwt = sin(wt);
const float3 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt const mixed3 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
const float3 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt); const mixed3 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt; qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt;
vreal[indexInBlock] = vprimereal; vreal[indexInBlock] = vprimereal;
...@@ -153,26 +153,26 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long* ...@@ -153,26 +153,26 @@ extern "C" __global__ void integrateStep(float4* posq, float4* velm, long long*
FFT_Q_BACKWARD FFT_Q_BACKWARD
FFT_V_BACKWARD FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w); posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w);
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_float4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w); velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
} }
} }
/** /**
* Advance the velocities by a half step. * Advance the velocities by a half step.
*/ */
extern "C" __global__ void advanceVelocities(float4* velm, long long* force, float dt) { extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mixed dt) {
const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES; const int numBlocks = (blockDim.x*gridDim.x)/NUM_COPIES;
const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES); const int blockStart = NUM_COPIES*(threadIdx.x/NUM_COPIES);
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
const float forceScale = 1/(float) 0xFFFFFFFF; const mixed forceScale = 1/(mixed) 0xFFFFFFFF;
// Update velocities. // Update velocities.
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3; int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
float4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w); particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w); particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w); particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
...@@ -181,9 +181,20 @@ extern "C" __global__ void advanceVelocities(float4* velm, long long* force, flo ...@@ -181,9 +181,20 @@ extern "C" __global__ void advanceVelocities(float4* velm, long long* force, flo
} }
/** /**
* Copy a set of per-atom values from the integrator's arrays to the context. * Copy a set of positions from the integrator's arrays to the context.
*/ */
extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, int copy) { extern "C" __global__ void copyPositionsToContext(mixed4* src, real4* dst, int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS;
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
mixed4 posq = src[base+order[particle]];
dst[particle] = make_real4(posq.x, posq.y, posq.z, posq.w);
}
}
/**
* Copy a set of velocities from the integrator's arrays to the context.
*/
extern "C" __global__ void copyVelocitiesToContext(mixed4* src, mixed4* dst, int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS; const int base = copy*PADDED_NUM_ATOMS;
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) { for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
dst[particle] = src[base+order[particle]]; dst[particle] = src[base+order[particle]];
...@@ -191,9 +202,9 @@ extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, i ...@@ -191,9 +202,9 @@ extern "C" __global__ void copyToContext(float4* src, float4* dst, int* order, i
} }
/** /**
* Copy a set of per-atom force values from the context to the integrator's arrays. * Copy a set of forces from the context to the integrator's arrays.
*/ */
extern "C" __global__ void copyFromContext(long long* src, long long* dst, int* order, int copy) { extern "C" __global__ void copyForcesFromContext(long long* src, long long* dst, int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS*3; const int base = copy*PADDED_NUM_ATOMS*3;
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) { for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
dst[base+order[particle]] = src[particle]; dst[base+order[particle]] = src[particle];
...@@ -205,10 +216,11 @@ extern "C" __global__ void copyFromContext(long long* src, long long* dst, int* ...@@ -205,10 +216,11 @@ extern "C" __global__ void copyFromContext(long long* src, long long* dst, int*
/** /**
* Update atom positions so all copies are offset by the same number of periodic box widths. * Update atom positions so all copies are offset by the same number of periodic box widths.
*/ */
extern "C" __global__ void applyCellTranslations(float4* posq, float4* movedPos, int* order, int movedCopy) { extern "C" __global__ void applyCellTranslations(mixed4* posq, real4* movedPos, int* order, int movedCopy) {
for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) { for (int particle = blockIdx.x*blockDim.x+threadIdx.x; particle < NUM_ATOMS; particle += blockDim.x*gridDim.x) {
int index = order[particle]; int index = order[particle];
float4 delta = movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index]; real4 p = movedPos[particle];
mixed4 delta = make_mixed4(p.x, p.y, p.z, p.w)-posq[movedCopy*PADDED_NUM_ATOMS+index];
for (int copy = 0; copy < NUM_COPIES; copy++) for (int copy = 0; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index] += delta; posq[copy*PADDED_NUM_ATOMS+index] += delta;
} }
......
...@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library # Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG}) ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_RPMD_TARGET}) TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_RPMD_TARGET})
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT}) ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
IF (OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
ADD_TEST(${TEST_ROOT}Mixed ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} mixed)
ADD_TEST(${TEST_ROOT}Double ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} double)
ENDIF(OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
ENDFOREACH(TEST_PROG ${TEST_PROGS}) ENDFOREACH(TEST_PROG ${TEST_PROGS})
...@@ -165,7 +165,6 @@ void testParaHydrogen() { ...@@ -165,7 +165,6 @@ void testParaHydrogen() {
vector<int> counts(numBins, 0); vector<int> counts(numBins, 0);
const double invBoxSize = 1.0/boxSize; const double invBoxSize = 1.0/boxSize;
double meanKE = 0.0; double meanKE = 0.0;
const RealOpenMM hbar = 1.054571628e-34*AVOGADRO/(1000*1e-12);
for (int step = 0; step < numSteps; step++) { for (int step = 0; step < numSteps; step++) {
integ.step(20); integ.step(20);
vector<State> states(numCopies); vector<State> states(numCopies);
...@@ -221,9 +220,11 @@ void testParaHydrogen() { ...@@ -221,9 +220,11 @@ void testParaHydrogen() {
ASSERT_USUALLY_EQUAL_TOL(60.0, 1.5*temperature+meanKE, 0.02); ASSERT_USUALLY_EQUAL_TOL(60.0, 1.5*temperature+meanKE, 0.02);
} }
int main() { int main(int argc, char* argv[]) {
try { try {
Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory()); Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory());
if (argc > 1)
Platform::getPlatformByName("CUDA").setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testFreeParticles(); testFreeParticles();
testParaHydrogen(); testParaHydrogen();
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011 Stanford University and the Authors. * * Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -92,7 +92,7 @@ private: ...@@ -92,7 +92,7 @@ private:
OpenCLArray* forces; OpenCLArray* forces;
OpenCLArray* positions; OpenCLArray* positions;
OpenCLArray* velocities; OpenCLArray* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyPositionsToContextKernel, copyVelocitiesToContextKernel, copyForcesFromContextKernel, translateKernel;
}; };
} // namespace OpenMM } // namespace OpenMM
......
float4 multiplyComplexRealPart(float2 c1, float4 c2r, float4 c2i) { mixed4 multiplyComplexRealPart(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2r-c1.y*c2i; return c1.x*c2r-c1.y*c2i;
} }
float4 multiplyComplexImagPart(float2 c1, float4 c2r, float4 c2i) { mixed4 multiplyComplexImagPart(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2i+c1.y*c2r; return c1.x*c2i+c1.y*c2r;
} }
float4 multiplyComplexRealPartConj(float2 c1, float4 c2r, float4 c2i) { mixed4 multiplyComplexRealPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2r+c1.y*c2i; return c1.x*c2r+c1.y*c2i;
} }
float4 multiplyComplexImagPartConj(float2 c1, float4 c2r, float4 c2i) { mixed4 multiplyComplexImagPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) {
return c1.x*c2i-c1.y*c2r; return c1.x*c2i-c1.y*c2r;
} }
/** /**
* Apply the PILE-L thermostat. * Apply the PILE-L thermostat.
*/ */
__kernel void applyPileThermostat(__global float4* velm, __local float4* v, __local float4* temp, __local float2* w, __global float4* random, unsigned int randomIndex, __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random, unsigned int randomIndex,
float dt, float kT, float friction) { mixed dt, mixed kT, mixed friction) {
const int numBlocks = get_global_size(0)/NUM_COPIES; const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart; const int indexInBlock = get_local_id(0)-blockStart;
const float nkT = NUM_COPIES*kT; const mixed nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR; const mixed twown = 2.0f*nkT/HBAR;
const float c1_0 = EXP(-0.5f*dt*friction); const mixed c1_0 = exp(-0.5f*dt*friction);
const float c2_0 = SQRT(1.0f-c1_0*c1_0); const mixed c2_0 = sqrt(1.0f-c1_0*c1_0);
__local float4* vreal = &v[blockStart]; __local mixed4 v[2*THREAD_BLOCK_SIZE];
__local float4* vimag = &v[blockStart+get_local_size(0)]; __local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
__local mixed4* vreal = &v[blockStart];
__local mixed4* vimag = &v[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES) if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (float2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
randomIndex += NUM_COPIES*(get_global_id(0)/NUM_COPIES); randomIndex += NUM_COPIES*(get_global_id(0)/NUM_COPIES);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
float invMass = particleVelm.w; mixed invMass = particleVelm.w;
float c3_0 = c2_0*SQRT(nkT*invMass); mixed c3_0 = c2_0*sqrt(nkT*invMass);
// Forward FFT. // Forward FFT.
vreal[indexInBlock] = SCALE*particleVelm; vreal[indexInBlock] = SCALE*particleVelm;
vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f); vimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f);
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
FFT_V_FORWARD FFT_V_FORWARD
...@@ -49,19 +52,19 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo ...@@ -49,19 +52,19 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
if (indexInBlock == 0) { if (indexInBlock == 0) {
// Apply a local Langevin thermostat to the centroid mode. // Apply a local Langevin thermostat to the centroid mode.
vreal[0].xyz = vreal[0].xyz*c1_0 + c3_0*random[randomIndex].xyz; vreal[0].xyz = vreal[0].xyz*c1_0 + c3_0*convert_mixed4(random[randomIndex]).xyz;
} }
else { else {
// Use critical damping white noise for the remaining modes. // Use critical damping white noise for the remaining modes.
int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock); int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock);
const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2); const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2);
const float wk = twown*sin(k*M_PI/NUM_COPIES); const mixed wk = twown*sin(k*M_PI/NUM_COPIES);
const float c1 = EXP(-wk*dt); const mixed c1 = exp(-wk*dt);
const float c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f); const mixed c2 = sqrt((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f);
const float c3 = c2*SQRT(nkT*invMass); const mixed c3 = c2*sqrt(nkT*invMass);
float4 rand1 = c3*random[randomIndex+k]; mixed4 rand1 = c3*convert_mixed4(random[randomIndex+k]);
float4 rand2 = (isCenter ? 0.0f : c3*random[randomIndex+NUM_COPIES-k]); mixed4 rand2 = (isCenter ? 0.0f : c3*convert_mixed4(random[randomIndex+NUM_COPIES-k]));
vreal[indexInBlock].xyz = c1*vreal[indexInBlock].xyz + rand1.xyz; vreal[indexInBlock].xyz = c1*vreal[indexInBlock].xyz + rand1.xyz;
vimag[indexInBlock].xyz = c1*vimag[indexInBlock].xyz + (indexInBlock < NUM_COPIES/2 ? rand2.xyz : -rand2.xyz); vimag[indexInBlock].xyz = c1*vimag[indexInBlock].xyz + (indexInBlock < NUM_COPIES/2 ? rand2.xyz : -rand2.xyz);
} }
...@@ -78,42 +81,45 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo ...@@ -78,42 +81,45 @@ __kernel void applyPileThermostat(__global float4* velm, __local float4* v, __lo
/** /**
* Advance the positions and velocities. * Advance the positions and velocities.
*/ */
__kernel void integrateStep(__global float4* posq, __global float4* velm, __global float4* force, __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __global real4* force, mixed dt, mixed kT) {
__local float4* q, __local float4* v, __local float4* temp, __local float2* w, float dt, float kT) {
const int numBlocks = get_global_size(0)/NUM_COPIES; const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart; const int indexInBlock = get_local_id(0)-blockStart;
const float nkT = NUM_COPIES*kT; const mixed nkT = NUM_COPIES*kT;
const float twown = 2.0f*nkT/HBAR; const mixed twown = 2.0f*nkT/HBAR;
__local mixed4 q[2*THREAD_BLOCK_SIZE];
__local mixed4 v[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
// Update velocities. // Update velocities.
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
float4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w); particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; velm[index] = particleVelm;
} }
// Evolve the free ring polymer by transforming to the frequency domain. // Evolve the free ring polymer by transforming to the frequency domain.
__local float4* qreal = &q[blockStart]; __local mixed4* qreal = &q[blockStart];
__local float4* qimag = &q[blockStart+get_local_size(0)]; __local mixed4* qimag = &q[blockStart+get_local_size(0)];
__local float4* vreal = &v[blockStart]; __local mixed4* vreal = &v[blockStart];
__local float4* vimag = &v[blockStart+get_local_size(0)]; __local mixed4* vimag = &v[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES) if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (float2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
// Forward FFT. // Forward FFT.
qreal[indexInBlock] = SCALE*particlePosq; qreal[indexInBlock] = SCALE*particlePosq;
qimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f); qimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f);
vreal[indexInBlock] = SCALE*particleVelm; vreal[indexInBlock] = SCALE*particleVelm;
vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f); vimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f);
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
FFT_Q_FORWARD FFT_Q_FORWARD
FFT_V_FORWARD FFT_V_FORWARD
...@@ -125,12 +131,12 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob ...@@ -125,12 +131,12 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
qimag[0].xyz += vimag[0].xyz*dt; qimag[0].xyz += vimag[0].xyz*dt;
} }
else { else {
const float wk = twown*sin(indexInBlock*M_PI/NUM_COPIES); const mixed wk = twown*sin(indexInBlock*M_PI/NUM_COPIES);
const float wt = wk*dt; const mixed wt = wk*dt;
const float coswt = cos(wt); const mixed coswt = cos(wt);
const float sinwt = sin(wt); const mixed sinwt = sin(wt);
const float4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt const mixed4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
const float4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt); const mixed4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt; qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt;
vreal[indexInBlock] = vprimereal; vreal[indexInBlock] = vprimereal;
...@@ -150,7 +156,7 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob ...@@ -150,7 +156,7 @@ __kernel void integrateStep(__global float4* posq, __global float4* velm, __glob
/** /**
* Advance the velocities by a half step. * Advance the velocities by a half step.
*/ */
__kernel void advanceVelocities(__global float4* velm, __global float4* force, float dt) { __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mixed dt) {
const int numBlocks = get_global_size(0)/NUM_COPIES; const int numBlocks = get_global_size(0)/NUM_COPIES;
const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
const int indexInBlock = get_local_id(0)-blockStart; const int indexInBlock = get_local_id(0)-blockStart;
...@@ -159,16 +165,26 @@ __kernel void advanceVelocities(__global float4* velm, __global float4* force, f ...@@ -159,16 +165,26 @@ __kernel void advanceVelocities(__global float4* velm, __global float4* force, f
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
float4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w); particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; velm[index] = particleVelm;
} }
} }
/** /**
* Copy a set of per-atom values from the integrator's arrays to the context. * Copy a set of positions from the integrator's arrays to the context.
*/ */
__kernel void copyToContext(__global float4* src, __global float4* dst, __global int* order, int copy) { __kernel void copyPositionsToContext(__global mixed4* src, __global real4* dst, __global int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS;
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[particle] = convert_real4(src[base+order[particle]]);
}
}
/**
* Copy a set of velocities from the integrator's arrays to the context.
*/
__kernel void copyVelocitiesToContext(__global mixed4* src, __global mixed4* dst, __global int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS; const int base = copy*PADDED_NUM_ATOMS;
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[particle] = src[base+order[particle]]; dst[particle] = src[base+order[particle]];
...@@ -176,9 +192,9 @@ __kernel void copyToContext(__global float4* src, __global float4* dst, __global ...@@ -176,9 +192,9 @@ __kernel void copyToContext(__global float4* src, __global float4* dst, __global
} }
/** /**
* Copy a set of per-atom values from the context to the integrator's arrays. * Copy a set forces from the context to the integrator's arrays.
*/ */
__kernel void copyFromContext(__global float4* src, __global float4* dst, __global int* order, int copy) { __kernel void copyForcesFromContext(__global real4* src, __global real4* dst, __global int* order, int copy) {
const int base = copy*PADDED_NUM_ATOMS; const int base = copy*PADDED_NUM_ATOMS;
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
dst[base+order[particle]] = src[particle]; dst[base+order[particle]] = src[particle];
...@@ -188,10 +204,10 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob ...@@ -188,10 +204,10 @@ __kernel void copyFromContext(__global float4* src, __global float4* dst, __glob
/** /**
* Update atom positions so all copies are offset by the same number of periodic box widths. * Update atom positions so all copies are offset by the same number of periodic box widths.
*/ */
__kernel void applyCellTranslations(__global float4* posq, __global float4* movedPos, __global int* order, int movedCopy) { __kernel void applyCellTranslations(__global mixed4* posq, __global real4* movedPos, __global int* order, int movedCopy) {
for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) {
int index = order[particle]; int index = order[particle];
float4 delta = movedPos[particle]-posq[movedCopy*PADDED_NUM_ATOMS+index]; mixed4 delta = convert_mixed4(movedPos[particle])-posq[movedCopy*PADDED_NUM_ATOMS+index];
for (int copy = 0; copy < NUM_COPIES; copy++) for (int copy = 0; copy < NUM_COPIES; copy++)
posq[copy*PADDED_NUM_ATOMS+index] += delta; posq[copy*PADDED_NUM_ATOMS+index] += delta;
} }
......
...@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -14,6 +14,10 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library # Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG}) ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_RPMD_TARGET}) TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_RPMD_TARGET})
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT}) ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
IF (OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
ADD_TEST(${TEST_ROOT}Mixed ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} mixed)
ADD_TEST(${TEST_ROOT}Double ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} double)
ENDIF(OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
ENDFOREACH(TEST_PROG ${TEST_PROGS}) ENDFOREACH(TEST_PROG ${TEST_PROGS})
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011 Stanford University and the Authors. * * Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -221,9 +221,11 @@ void testParaHydrogen() { ...@@ -221,9 +221,11 @@ void testParaHydrogen() {
ASSERT_USUALLY_EQUAL_TOL(60.0, 1.5*temperature+meanKE, 0.02); ASSERT_USUALLY_EQUAL_TOL(60.0, 1.5*temperature+meanKE, 0.02);
} }
int main() { int main(int argc, char* argv[]) {
try { try {
Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory()); Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory());
if (argc > 1)
Platform::getPlatformByName("OpenCL").setPropertyDefaultValue("OpenCLPrecision", string(argv[1]));
testFreeParticles(); testFreeParticles();
testParaHydrogen(); testParaHydrogen();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment