Commit 06089c9a authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

No substantive changes -- cleanup

parent 4d6895b0
...@@ -1642,9 +1642,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect ...@@ -1642,9 +1642,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
} }
} }
if( 0 && amoebaGpu->log ) if( 0 && amoebaGpu->log ){
fprintf( amoebaGpu->log, "Z1 %4d %d [%4d %4d %4d]\n", ii, axisType[ii], fprintf( amoebaGpu->log, "Z1 %4d %d [%4d %4d %4d] dmp/thole %15.7e %15.7e\n", ii, axisType[ii],
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii] ); multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii], dampingFactors[ii], tholes[ii] );
}
// charges // charges
......
...@@ -513,17 +513,9 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle* ...@@ -513,17 +513,9 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
} }
static __device__ void zeroElectrostaticParticle( struct ElectrostaticParticle* sA ){ static __device__ void zeroElectrostaticParticle( struct ElectrostaticParticle* sA ){
// coordinates & charge
sA->force[0] = 0.0f; sA->force[0] = 0.0f;
sA->force[1] = 0.0f; sA->force[1] = 0.0f;
sA->force[2] = 0.0f; sA->force[2] = 0.0f;
/*
sA->torque[0] = 0.0f;
sA->torque[1] = 0.0f;
sA->torque[2] = 0.0f;
*/
} }
#undef SUB_METHOD_NAME #undef SUB_METHOD_NAME
...@@ -649,7 +641,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo ...@@ -649,7 +641,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
CUDAStream<float4>* debugArray = new CUDAStream<float4>(maxSlots*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(maxSlots*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysData, 0, sizeof( float )*4*maxSlots*paddedNumberOfAtoms); memset( debugArray->_pSysData, 0, sizeof( float )*4*maxSlots*paddedNumberOfAtoms);
debugArray->Upload(); debugArray->Upload();
unsigned int targetAtom = 237; //unsigned int targetAtom = 1137;
unsigned int targetAtom = 1;
#endif #endif
// on first pass, set threads/block // on first pass, set threads/block
...@@ -675,8 +668,14 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo ...@@ -675,8 +668,14 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(ElectrostaticParticle), sizeof(ElectrostaticParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); (void) fflush( amoebaGpu->log ); sizeof(ElectrostaticParticle), sizeof(ElectrostaticParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); (void) fflush( amoebaGpu->log );
} }
#endif if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData, debugArray->_pDevData, targetAtom );
} else {
kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData, debugArray->_pDevData, targetAtom );
}
#else
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>( kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData ); gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
...@@ -684,8 +683,44 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo ...@@ -684,8 +683,44 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>( kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData ); gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
} }
#endif
LAUNCHERROR("kCalculateAmoebaCudaElectrostaticN2Forces"); LAUNCHERROR("kCalculateAmoebaCudaElectrostaticN2Forces");
#ifdef AMOEBA_DEBUG
if( 0 ){
debugArray->Download();
std::vector<double> conversions;
conversions.push_back( 0.1f/4.184f );
conversions.push_back( 0.1f/4.184f );
unsigned int kkBlocks = 4;
(void) fprintf( stderr, "\nTarget atom output %5u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; ii++ ){
double sum = 0.0;
for( unsigned int kk = 0; kk < kkBlocks && sum == 0.0; kk++ ){
unsigned int index = ii + kk*amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
sum += debugArray->_pSysData[index].x + debugArray->_pSysData[index].y + debugArray->_pSysData[index].z + debugArray->_pSysData[index].w;
}
if( sum > 0.0 ){
(void) fprintf( stderr, "%5u", ii );
for( unsigned int kk = 0; kk < kkBlocks; kk++ ){
unsigned int index = ii + kk*amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
(void) fprintf( stderr, " %15.7e %15.7e %15.7e %5.1f",
conversions[kk]*debugArray->_pSysData[index].x, conversions[kk]*debugArray->_pSysData[index].y, conversions[kk]*debugArray->_pSysData[index].z,
debugArray->_pSysData[index].w );
if( ((kk+1) % 2) == 0 && (kk != (kkBlocks-1) ) ){
(void) fprintf( stderr, "\n%5u", ii );
}
}
(void) fprintf( stderr, "\n" );
if( kkBlocks > 2 ){
(void) fprintf( stderr, "\n" );
}
}
}
}
#endif
#ifdef AMOEBA_DEBUG
if( 0 ){ if( 0 ){
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
...@@ -694,7 +729,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo ...@@ -694,7 +729,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
fileId.push_back( call++ ); fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1"); CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "ElectrostaticTemp");
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f ); //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 ); reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
...@@ -706,6 +741,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo ...@@ -706,6 +741,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
cudaWriteVectorOfDoubleVectorsToFile( "CudaElectrostaticTorque", fileId, outputVector ); cudaWriteVectorOfDoubleVectorsToFile( "CudaElectrostaticTorque", fileId, outputVector );
delete temp; delete temp;
} }
#endif
if( addTorqueToForce ){ if( addTorqueToForce ){
kReduceTorque( amoebaGpu ); kReduceTorque( amoebaGpu );
......
...@@ -42,10 +42,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -42,10 +42,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif #endif
){ ){
#ifdef AMOEBA_DEBUG
float4 pullBack[20];
#endif
extern __shared__ ElectrostaticParticle sA[]; extern __shared__ ElectrostaticParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
...@@ -210,6 +206,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -210,6 +206,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA[tj].force[0] -= force[0]; psA[tj].force[0] -= force[0];
psA[tj].force[1] -= force[1]; psA[tj].force[1] -= force[1];
psA[tj].force[2] -= force[2]; psA[tj].force[2] -= force[2];
} }
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
...@@ -243,6 +240,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -243,6 +240,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif #endif
zeroElectrostaticParticle( &(sA[threadIdx.x]) ); zeroElectrostaticParticle( &(sA[threadIdx.x]) );
zeroElectrostaticParticle( &localParticle ); zeroElectrostaticParticle( &localParticle );
tj = tgx;
for (unsigned int j = 0; j < GRID; j++){ for (unsigned int j = 0; j < GRID; j++){
unsigned int atomJ = y + tj; unsigned int atomJ = y + tj;
...@@ -264,6 +262,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -264,6 +262,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA[tj].force[0] += force[0]; psA[tj].force[0] += force[0];
psA[tj].force[1] += force[1]; psA[tj].force[1] += force[1];
psA[tj].force[2] += force[2]; psA[tj].force[2] += force[2];
} }
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
...@@ -298,5 +297,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -298,5 +297,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
pos++; pos++;
} }
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy); cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy);
} }
...@@ -110,7 +110,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -110,7 +110,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu\n", methodName, (void) fprintf( amoebaGpu->log, "cudaComputeAmoebaFixedEField numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
...@@ -121,23 +121,13 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -121,23 +121,13 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>( kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData ); amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
} else { } else {
kCalculateAmoebaFixedE_FieldN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>( kCalculateAmoebaFixedE_FieldN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData ); amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
} }
LAUNCHERROR("kCalculateAmoebaFixedE_FieldN2Forces_kernel"); LAUNCHERROR("kCalculateAmoebaFixedE_FieldN2Forces_kernel");
...@@ -198,91 +188,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -198,91 +188,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "EFields End\n" ); (void) fprintf( amoebaGpu->log, "EFields End\n" );
/*
(void) fprintf( amoebaGpu->log, "DebugQ\n" );
debugArray->Download();
if( 0 ){
int ii = targetAtom;
float sum[2][3] = { { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 0.0f } };
(void) fprintf( amoebaGpu->log,"\n" );
for( int jj = 0; jj < 1248; jj++ ){
int debugIndex = jj;
if( jj == ii )continue;
(void) fprintf( amoebaGpu->log,"\n\n%4d %4d rrs\n[%16.9e %16.9e %16.9e %16.9e]\n",
ii, jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y1 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[0][0] += debugArray->_pSysData[debugIndex].x;
sum[0][1] += debugArray->_pSysData[debugIndex].y;
sum[0][2] += debugArray->_pSysData[debugIndex].z;
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y2 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[1][0] += debugArray->_pSysData[debugIndex].x;
sum[1][1] += debugArray->_pSysData[debugIndex].y;
sum[1][2] += debugArray->_pSysData[debugIndex].z;
}
(void) fprintf( amoebaGpu->log,"SumQ [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n",
sum[0][0], sum[0][1], sum[0][2],
sum[1][0], sum[1][1], sum[1][2] );
}
for( unsigned int ii = 0; ii < debugArray->_stride; ii++ ){
int print;
if( debugArray->_pSysData[ii].x != 0.0f || debugArray->_pSysData[ii].y != 0.0f ||
debugArray->_pSysData[ii].y != 0.0f || debugArray->_pSysData[ii].w != 0.0f ||
debugArray->_pSysData[ii].x != debugArray->_pSysData[ii].x ||
debugArray->_pSysData[ii].y != debugArray->_pSysData[ii].y ||
debugArray->_pSysData[ii].z != debugArray->_pSysData[ii].z ||
debugArray->_pSysData[ii].w != debugArray->_pSysData[ii].w ){
print = 0;
} else {
print = 0;
}
if( print ){
unsigned int atomI = ii/amoebaGpu->paddedNumberOfAtoms;
unsigned int atomJ = ii - atomI*amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log, "%5u [%5u %5u] ", ii, atomI, atomJ);
(void) fprintf( amoebaGpu->log, "%14.6e %14.6e %14.6e %14.6e\n",
debugArray->_pSysData[ii].x,
debugArray->_pSysData[ii].y,
debugArray->_pSysData[ii].z,
debugArray->_pSysData[ii].w );
}
}
*/
// write results to file // write results to file
......
...@@ -447,7 +447,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt ...@@ -447,7 +447,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId.push_back( call++ ); fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1"); CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp_MapTorqueAndAddToForce");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 ); reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f ); cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f );
...@@ -479,7 +479,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt ...@@ -479,7 +479,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId.push_back( call++ ); fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1"); CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "TempMapTorqueAndAddToForce2");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 ); reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f ); cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment