#ifndef OPENMM_CUDASTREAMIMPL_H_
#define OPENMM_CUDASTREAMIMPL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "openmm/StreamImpl.h"
#include "kernels/gputypes.h"
namespace OpenMM {
/**
* This is the implementation of streams in the CUDA Platform.
*/
template
class CudaStreamImpl : public StreamImpl {
public:
CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, int substreams, _gpuContext* gpu);
CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, CUDAStream* stream, int rowOffset, float* padding, _gpuContext* gpu);
~CudaStreamImpl();
void loadFromArray(const void* array);
void saveToArray(void* array);
void fillWithValue(void* value);
const CUDAStream& getStream() const;
CUDAStream& getStream();
private:
void initType();
CUDAStream* stream;
_gpuContext* gpu;
bool ownStream;
int width, rowOffset;
float paddingValues[4];
Stream::DataType baseType;
};
template
CudaStreamImpl::CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, int substreams, _gpuContext* gpu) :
StreamImpl(name, size, type, platform), stream(new CUDAStream(size, substreams)), ownStream(true), gpu(gpu) {
initType();
rowOffset = width;
};
template
CudaStreamImpl::CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, CUDAStream* stream, int rowOffset, float* padding, _gpuContext* gpu) :
StreamImpl(name, size, type, platform), stream(stream), rowOffset(rowOffset), ownStream(false), gpu(gpu) {
initType();
for (int i = 0; i < 4; ++i)
paddingValues[i] = padding[i];
};
template
void CudaStreamImpl::initType() {
switch (getDataType()) {
case Stream::Float:
case Stream::Float2:
case Stream::Float3:
case Stream::Float4:
baseType = Stream::Float;
break;
case Stream::Double:
case Stream::Double2:
case Stream::Double3:
case Stream::Double4:
baseType = Stream::Double;
break;
case Stream::Integer:
case Stream::Integer2:
case Stream::Integer3:
case Stream::Integer4:
baseType = Stream::Integer;
break;
}
switch (getDataType()) {
case Stream::Float:
case Stream::Double:
case Stream::Integer:
width = 1;
break;
case Stream::Float2:
case Stream::Double2:
case Stream::Integer2:
width = 2;
break;
case Stream::Float3:
case Stream::Double3:
case Stream::Integer3:
width = 3;
break;
case Stream::Float4:
case Stream::Double4:
case Stream::Integer4:
width = 4;
break;
}
}
template
CudaStreamImpl::~CudaStreamImpl() {
if (ownStream)
delete stream;
}
template
void CudaStreamImpl::loadFromArray(const void* array) {
float* data = reinterpret_cast(stream->_pSysData);
int* order = gpu->psAtomIndex->_pSysData;
if (baseType == Stream::Float) {
float* arrayData = (float*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = arrayData[order[i]*width+j];
}
else if (baseType == Stream::Double) {
double* arrayData = (double*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = (float) arrayData[order[i]*width+j];
}
else {
int* arrayData = (int*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = (float) arrayData[order[i]*width+j];
}
for (int i = getSize(); i < (int) stream->_length; ++i)
for (int j = 0; j < rowOffset; ++j)
data[i*rowOffset+j] = paddingValues[j];
stream->Upload();
if (gpu && getName() == "particlePositions") {
gpu->bRecalculateBornRadii = true;
for (int i = 0; i < gpu->posCellOffsets.size(); i++)
gpu->posCellOffsets[i] = make_int3(0, 0, 0);
}
}
template
void CudaStreamImpl::saveToArray(void* array) {
stream->Download();
float* data = reinterpret_cast(stream->_pSysData);
int* order = gpu->psAtomIndex->_pSysData;
if (baseType == Stream::Float) {
float* arrayData = (float*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
arrayData[order[i]*width+j] = data[i*rowOffset+j];
}
else if (baseType == Stream::Double) {
double* arrayData = (double*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
arrayData[order[i]*width+j] = data[i*rowOffset+j];
if (gpu && getName() == "particlePositions") {
for (int i = 0; i < getSize(); i++) {
int3 offset = gpu->posCellOffsets[i];
arrayData[order[i]*width] -= offset.x*gpu->sim.periodicBoxSizeX;
arrayData[order[i]*width+1] -= offset.y*gpu->sim.periodicBoxSizeY;
arrayData[order[i]*width+2] -= offset.z*gpu->sim.periodicBoxSizeZ;
}
}
}
else {
int* arrayData = (int*) array;
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
arrayData[order[i]*width+j] = (int) data[i*rowOffset+j];
}
}
template
void CudaStreamImpl::fillWithValue(void* value) {
float* data = reinterpret_cast(stream->_pSysData);
if (baseType == Stream::Float) {
float valueData = *((float*) value);
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = valueData;
}
else if (baseType == Stream::Double) {
double valueData = *((double*) value);
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = (float) valueData;
}
else {
int valueData = *((int*) value);
for (int i = 0; i < getSize(); ++i)
for (int j = 0; j < width; ++j)
data[i*rowOffset+j] = (float) valueData;
}
for (int i = getSize(); i < (int) stream->_length; ++i)
for (int j = 0; j < rowOffset; ++j)
data[i*rowOffset+j] = paddingValues[j];
stream->Upload();
if (gpu && getName() == "particlePositions") {
gpu->bRecalculateBornRadii = true;
for (int i = 0; i < gpu->posCellOffsets.size(); i++)
gpu->posCellOffsets[i] = make_int3(0, 0, 0);
}
}
template
const CUDAStream& CudaStreamImpl::getStream() const {
return stream;
}
template
CUDAStream& CudaStreamImpl::getStream() {
return stream;
}
} // namespace OpenMM
#endif /*OPENMM_CUDASTREAMIMPL_H_*/