Remove file accidentally included in previous commit

75104c41 · Andy Simmonett · 98f73120 · 98f73120
Unverified Commit 75104c41 authored Oct 25, 2019 by Andy Simmonett
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 128 deletions

platforms/opencl/src/kernels/diffcucl.txt platforms/opencl/src/kernels/diffcucl.txt +0 -128

No files found.
--- a/platforms/opencl/src/kernels/diffcucl.txt
+++ b/platforms/opencl/src/kernels/diffcucl.txt
-2c2
-<  * Perform the first step of Verlet integration.
---
->  * Perform the first step of verlet integration.
-5,11c5,10
-< extern "C" __global__ void integrateVerletPart1(int numAtoms, int paddedNumAtoms, const mixed2* __restrict__ dt, const real4* __restrict__ posq,
-<         const real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta) {
-<     const mixed2 stepSize = dt[0];
-<     const mixed dtPos = stepSize.y;
-<     const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
-<     const mixed scale = dtVel/(mixed) 0x100000000;
-<     for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
---
-> __kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
->     mixed2 stepSize = dt[0];
->     mixed dtPos = stepSize.y;
->     mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
->     int index = get_global_id(0);
->     while (index < numAtoms) {
-17c16
-<             mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
->             mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-21,26c20,23
-<             velocity.x += scale*force[index]*velocity.w;
-<             velocity.y += scale*force[index+paddedNumAtoms]*velocity.w;
-<             velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w;
-<             pos.x = velocity.x*dtPos;
-<             pos.y = velocity.y*dtPos;
-<             pos.z = velocity.z*dtPos;
---
->             velocity.x += force[index].x*dtVel*velocity.w;
->             velocity.y += force[index].y*dtVel*velocity.w;
->             velocity.z += force[index].z*dtVel*velocity.w;
->             pos.xyz = velocity.xyz*dtPos;
-29a27
->         index += get_global_size(0);
-34c32
-<  * Perform the second step of Verlet integration.
---
->  * Perform the second step of verlet integration.
-37,38c35
-< extern "C" __global__ void integrateVerletPart2(int numAtoms, mixed2* __restrict__ dt, real4* __restrict__ posq,
-<         real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, const mixed4* __restrict__ posDelta) {
---
-> __kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
-40c37
-< #if __CUDA_ARCH__ >= 130
---
-> #ifdef SUPPORTS_DOUBLE_PRECISION
-46,47c43
-<     int index = blockIdx.x*blockDim.x+threadIdx.x;
-<     if (index == 0)
---
->     if (get_global_id(0) == 0)
-49c45,47
-<     for (; index < numAtoms; index += blockDim.x*gridDim.x) {
---
->     barrier(CLK_LOCAL_MEM_FENCE);
->     int index = get_global_id(0);
->     while (index < numAtoms) {
-55c53
-<             mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
---
->             mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-60,64c58,60
-<             pos.x += delta.x;
-<             pos.y += delta.y;
-<             pos.z += delta.z;
-< #if __CUDA_ARCH__ >= 130
-<             velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w);
---
->             pos.xyz += delta.xyz;
-> #ifdef SUPPORTS_DOUBLE_PRECISION
->             velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
-66c62
-<             velocity = make_mixed4((mixed) (delta.x*oneOverDt+delta.x*correction), (mixed) (delta.y*oneOverDt+delta.y*correction), (mixed) (delta.z*oneOverDt+delta.z*correction), velocity.w);
---
->             velocity.xyz = delta.xyz*oneOverDt + delta.xyz*correction;
-69,70c65,66
-<             posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
-<             posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
---
->             posq[index] = convert_real4(pos);
->             posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
-75a72
->         index += get_global_size(0);
-83c80
-< extern "C" __global__ void selectVerletStepSize(int numAtoms, int paddedNumAtoms, mixed maxStepSize, mixed errorTol, mixed2* __restrict__ dt, const mixed4* __restrict__ velm, const long long* __restrict__ force) {
---
-> __kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
-86,90c83,86
-<     extern __shared__ mixed error[];
-<     mixed err = 0.0f;
-<     const mixed scale = RECIP((mixed) 0x100000000);
-<     for (int index = threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
-<         mixed3 f = make_mixed3(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2]);
---
->     mixed err = 0;
->     int index = get_local_id(0);
->     while (index < numAtoms) {
->         real4 f = force[index];
-92a89
->         index += get_global_size(0);
-94,95c91,92
-<     error[threadIdx.x] = err;
-<     __syncthreads();
---
->     error[get_local_id(0)] = err;
->     barrier(CLK_LOCAL_MEM_FENCE);
-99,102c96,99
-<     for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
-<         if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
-<             error[threadIdx.x] += error[threadIdx.x+offset];
-<         __syncthreads();
---
->     for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
->         if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
->             error[get_local_id(0)] += error[get_local_id(0)+offset];
->         barrier(CLK_LOCAL_MEM_FENCE);
-104,106c101,103
-<     if (threadIdx.x == 0) {
-<         mixed totalError = SQRT(error[0]/(numAtoms*3));
-<         mixed newStepSize = SQRT(errorTol/totalError);
---
->     if (get_local_id(0) == 0) {
->         mixed totalError = sqrt(error[0]/(numAtoms*3));
->         mixed newStepSize = sqrt(errorTol/totalError);