"driver/driver.cpp" did not exist on "9657baec325227d0d64424bffb394afbd6d37a60"
blockwise_winograd_transform.cuh 1.08 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#pragma once
#include "constant_tensor_descriptor.cuh"

template <class TFloat,
          unsigned InTileSizeH,
          unsigned InTileSizeW,
          unsigned S,
          unsigned R,
          unsigned OutTileSizeH,
          unsigned OutTileSizeW,
          unsigned NPerBlock,
          unsigned CPerBlock,
          unsigned YPerBlock,
          unsigned XPerBlock,
          unsigned BlockSize>
__device__ void blockwise_winograd_transform_input(TFloat* const __restrict__ p_in,
                                                   TFloat* __restrict__ p_in_transform)
{
    p_in_transform[0] = 1;
}

template <class TFloat,
          unsigned InTileSizeH,
          unsigned InTileSizeW,
          unsigned S,
          unsigned R,
          unsigned OutTileSizeH,
          unsigned OutTileSizeW,
          unsigned KPerBlock,
          unsigned CPerBlock,
          unsigned BlockSize>
__device__ void blockwise_winograd_transform_weight(TFloat* const __restrict__ p_wei,
                                                    TFloat* __restrict__ p_wei_transform)
{
    p_wei_transform[0] = 1;
}