#ifndef OPENMM_CUDASTREAMIMPL_H_ #define OPENMM_CUDASTREAMIMPL_H_ /* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2008 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "openmm/StreamImpl.h" #include "kernels/gputypes.h" namespace OpenMM { /** * This is the implementation of streams in the CUDA Platform. */ template class CudaStreamImpl : public StreamImpl { public: CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, int substreams, _gpuContext* gpu); CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, CUDAStream* stream, int rowOffset, float* padding, _gpuContext* gpu); ~CudaStreamImpl(); void loadFromArray(const void* array); void saveToArray(void* array); void fillWithValue(void* value); const CUDAStream& getStream() const; CUDAStream& getStream(); private: void initType(); CUDAStream* stream; _gpuContext* gpu; bool ownStream; int width, rowOffset; float paddingValues[4]; Stream::DataType baseType; }; template CudaStreamImpl::CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, int substreams, _gpuContext* gpu) : StreamImpl(name, size, type, platform), stream(new CUDAStream(size, substreams)), ownStream(true), gpu(gpu) { initType(); rowOffset = width; }; template CudaStreamImpl::CudaStreamImpl(std::string name, int size, Stream::DataType type, const Platform& platform, CUDAStream* stream, int rowOffset, float* padding, _gpuContext* gpu) : StreamImpl(name, size, type, platform), stream(stream), rowOffset(rowOffset), ownStream(false), gpu(gpu) { initType(); for (int i = 0; i < 4; ++i) paddingValues[i] = padding[i]; }; template void CudaStreamImpl::initType() { switch (getDataType()) { case Stream::Float: case Stream::Float2: case Stream::Float3: case Stream::Float4: baseType = Stream::Float; break; case Stream::Double: case Stream::Double2: case Stream::Double3: case Stream::Double4: baseType = Stream::Double; break; case Stream::Integer: case Stream::Integer2: case Stream::Integer3: case Stream::Integer4: baseType = Stream::Integer; break; } switch (getDataType()) { case Stream::Float: case Stream::Double: case Stream::Integer: width = 1; break; case Stream::Float2: case Stream::Double2: case Stream::Integer2: width = 2; break; case Stream::Float3: case Stream::Double3: case Stream::Integer3: width = 3; break; case Stream::Float4: case Stream::Double4: case Stream::Integer4: width = 4; break; } } template CudaStreamImpl::~CudaStreamImpl() { if (ownStream) delete stream; } template void CudaStreamImpl::loadFromArray(const void* array) { float* data = reinterpret_cast(stream->_pSysData); int* order = gpu->psAtomIndex->_pSysData; if (baseType == Stream::Float) { float* arrayData = (float*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = arrayData[order[i]*width+j]; } else if (baseType == Stream::Double) { double* arrayData = (double*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = (float) arrayData[order[i]*width+j]; } else { int* arrayData = (int*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = (float) arrayData[order[i]*width+j]; } for (int i = getSize(); i < (int) stream->_length; ++i) for (int j = 0; j < rowOffset; ++j) data[i*rowOffset+j] = paddingValues[j]; stream->Upload(); if (gpu && getName() == "particlePositions") { gpu->bRecalculateBornRadii = true; for (int i = 0; i < gpu->posCellOffsets.size(); i++) gpu->posCellOffsets[i] = make_int3(0, 0, 0); } } template void CudaStreamImpl::saveToArray(void* array) { stream->Download(); float* data = reinterpret_cast(stream->_pSysData); int* order = gpu->psAtomIndex->_pSysData; if (baseType == Stream::Float) { float* arrayData = (float*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) arrayData[order[i]*width+j] = data[i*rowOffset+j]; } else if (baseType == Stream::Double) { double* arrayData = (double*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) arrayData[order[i]*width+j] = data[i*rowOffset+j]; if (gpu && getName() == "particlePositions") { for (int i = 0; i < getSize(); i++) { int3 offset = gpu->posCellOffsets[i]; arrayData[order[i]*width] -= offset.x*gpu->sim.periodicBoxSizeX; arrayData[order[i]*width+1] -= offset.y*gpu->sim.periodicBoxSizeY; arrayData[order[i]*width+2] -= offset.z*gpu->sim.periodicBoxSizeZ; } } } else { int* arrayData = (int*) array; for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) arrayData[order[i]*width+j] = (int) data[i*rowOffset+j]; } } template void CudaStreamImpl::fillWithValue(void* value) { float* data = reinterpret_cast(stream->_pSysData); if (baseType == Stream::Float) { float valueData = *((float*) value); for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = valueData; } else if (baseType == Stream::Double) { double valueData = *((double*) value); for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = (float) valueData; } else { int valueData = *((int*) value); for (int i = 0; i < getSize(); ++i) for (int j = 0; j < width; ++j) data[i*rowOffset+j] = (float) valueData; } for (int i = getSize(); i < (int) stream->_length; ++i) for (int j = 0; j < rowOffset; ++j) data[i*rowOffset+j] = paddingValues[j]; stream->Upload(); if (gpu && getName() == "particlePositions") { gpu->bRecalculateBornRadii = true; for (int i = 0; i < gpu->posCellOffsets.size(); i++) gpu->posCellOffsets[i] = make_int3(0, 0, 0); } } template const CUDAStream& CudaStreamImpl::getStream() const { return stream; } template CUDAStream& CudaStreamImpl::getStream() { return stream; } } // namespace OpenMM #endif /*OPENMM_CUDASTREAMIMPL_H_*/