device_tensor.cuh 976 Bytes
Newer Older
Chao Liu's avatar
Chao Liu committed
1
#pragma once
Chao Liu's avatar
Chao Liu committed
2
#include <algorithm>
Chao Liu's avatar
Chao Liu committed
3
4
5
#include "helper_cuda.h"
#include "tensor.hpp"

Chao Liu's avatar
Chao Liu committed
6
template <unsigned NDim>
Chao Liu's avatar
Chao Liu committed
7
8
struct DeviceTensorDescriptor
{
Chao Liu's avatar
Chao Liu committed
9
    __host__ __device__ DeviceTensorDescriptor() = default;
Chao Liu's avatar
Chao Liu committed
10
11
12

    __host__ DeviceTensorDescriptor(const TensorDescriptor& host_desc)
    {
Chao Liu's avatar
Chao Liu committed
13
14
15
16
        assert(NDim == host_desc.GetDimension());
        std::copy(host_desc.GetLengths().begin(), host_desc.GetLengths().end(), mpLengths);
        std::copy(host_desc.GetStrides().begin(), host_desc.GetStrides().end(), mpStrides);
    }
Chao Liu's avatar
Chao Liu committed
17

Chao Liu's avatar
Chao Liu committed
18
    __host__ __device__ unsigned GetLength(unsigned i) const { return mpLengths[i]; }
Chao Liu's avatar
Chao Liu committed
19

Chao Liu's avatar
Chao Liu committed
20
    __host__ __device__ unsigned GetStride(unsigned i) const { return mpStrides[i]; }
Chao Liu's avatar
Chao Liu committed
21

Chao Liu's avatar
Chao Liu committed
22
    // this is ugly
Chao Liu's avatar
Chao Liu committed
23
    __host__ __device__ unsigned Get1dIndex(unsigned n, unsigned c, unsigned h, unsigned w) const
Chao Liu's avatar
Chao Liu committed
24
    {
Chao Liu's avatar
Chao Liu committed
25
        return n * mpStrides[0] + c * mpStrides[1] + h * mpStrides[2] + w * mpStrides[3];
Chao Liu's avatar
Chao Liu committed
26
27
    }

Chao Liu's avatar
Chao Liu committed
28
    unsigned mpLengths[NDim];
Chao Liu's avatar
Chao Liu committed
29
    unsigned mpStrides[NDim];
Chao Liu's avatar
Chao Liu committed
30
};