pmeBsplineTheta[i+j*NUM_ATOMS]=make_real4(data[j].x,data[j].y,data[j].z,pos.w);// Storing the charge here improves cache coherency in the charge spreading kernel
real3d=data[j];// Copy it as a workaround for a bug in CUDA 5.0
pmeBsplineTheta[i+j*NUM_ATOMS]=make_real4(d.x,d.y,d.z,pos.w);// Storing the charge here improves cache coherency in the charge spreading kernel