device_tensor.cuh 1.23 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#pragma once
#include "helper_cuda.h"
#include "tensor.hpp"

struct DeviceTensorDescriptor
{
    DeviceTensorDescriptor() = delete;

    __host__ DeviceTensorDescriptor(const TensorDescriptor& host_desc)
        : mDataType(host_desc.GetDataType()), mDim(host_desc.GetDimension())
    {
        std::size_t data_sz = host_desc.GetDataType() == DataType_t::Float ? 4 : 2;

        checkCudaErrors(cudaMalloc(&mpLengths, data_sz * mDim));
        checkCudaErrors(cudaMalloc(&mpStrides, data_sz * mDim));

        checkCudaErrors(
            cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetLengths().data())),
                       mpLengths,
                       data_sz * mDim,
                       cudaMemcpyHostToDevice));
        checkCudaErrors(
            cudaMemcpy(const_cast<void*>(static_cast<const void*>(host_desc.GetStrides().data())),
                       mpStrides,
                       data_sz * mDim,
                       cudaMemcpyHostToDevice));
    }

    __host__ ~DeviceTensorDescriptor()
    {
        checkCudaErrors(cudaFree(mpLengths));
        checkCudaErrors(cudaFree(mpStrides));
    }

    DataType_t mDataType;
    unsigned long mDim;
    unsigned long* mpLengths;
    unsigned long* mpStrides;
};