Commit 05e04665 authored by Chao Liu's avatar Chao Liu
Browse files

remove .hip extension

parent a68b16a5
#pragma once
#include "ConstantTensorDescriptor.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "threadwise_tensor_slice_op.hpp"
template <index_t BlockSize, class Float, class DstDesc, class F>
__device__ void
......
#pragma once
#include "threadwise_gemm.hip.hpp"
#include "threadwise_gemm.hpp"
template <index_t BlockSize,
class BlockMatrixA,
......
#pragma once
#include "common.hip.hpp"
#include "threadwise_gemm.hip.hpp"
#include "common.hpp"
#include "threadwise_gemm.hpp"
// if following number are power of 2, index calculation shall be greatly reduced:
// MPerThreadSubC, NPerThreadSubC, MLevel0Cluster, NLevel0Cluster, MLevel1Cluster, NLevel1Cluster
......
#pragma once
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hpp"
// slice a (normal or merged) tensor, and copy it into another (normal or merged) tensor
// memory layout (ordering of dimensions) can be different between src and dst
......
#pragma once
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hpp"
template <index_t BlockSize,
class Float,
......
#pragma once
#include "base.hip.hpp"
#include "vector_type.hip.hpp"
#include "integral_constant.hip.hpp"
#include "Sequence.hip.hpp"
#include "Array.hip.hpp"
#include "functional.hip.hpp"
#include "functional2.hip.hpp"
#include "functional3.hip.hpp"
#if USE_AMD_INLINE_ASM
#include "amd_inline_asm.hip.hpp"
#endif
#pragma once
#include "base.hpp"
#include "vector_type.hpp"
#include "integral_constant.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#if USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp"
#endif
#pragma once
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantTensorDescriptor.hpp"
// this is ugly, only for 4d
template <class InDesc, class WeiDesc>
......
#pragma once
#include "integral_constant.hip.hpp"
#include "Sequence.hip.hpp"
#include "integral_constant.hpp"
#include "Sequence.hpp"
struct forwarder
{
......
#pragma once
#include "functional.hip.hpp"
#include "Sequence.hip.hpp"
#include "functional.hpp"
#include "Sequence.hpp"
template <class>
struct static_for_impl;
......
#pragma once
#include "functional.hip.hpp"
#include "functional2.hip.hpp"
#include "Sequence.hip.hpp"
#include "Array.hip.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "Sequence.hpp"
#include "Array.hpp"
// RemainLengths: Sequence<...>
template <class RemainLengths>
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_4d_tensor_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_direct_convolution.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_direct_convolution.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_4d_tensor_op.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_3d_tensor_op.hip.hpp"
#include "blockwise_4d_tensor_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_3d_tensor_op.hip.hpp"
#include "blockwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_3d_tensor_op.hpp"
#include "blockwise_tensor_slice_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_4d_tensor_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_4d_tensor_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_4d_tensor_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
#pragma once
#include "common.hip.hpp"
#include "ConstantTensorDescriptor.hip.hpp"
#include "ConstantMatrixDescriptor.hip.hpp"
#include "blockwise_2d_tensor_op.hip.hpp"
#include "blockwise_tensor_slice_op.hip.hpp"
#include "threadwise_tensor_slice_op.hip.hpp"
#include "threadwise_4d_tensor_op.hip.hpp"
#include "blockwise_batched_gemm.hip.hpp"
#include "common.hpp"
#include "ConstantTensorDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_2d_tensor_op.hpp"
#include "blockwise_tensor_slice_op.hpp"
#include "threadwise_tensor_slice_op.hpp"
#include "threadwise_4d_tensor_op.hpp"
#include "blockwise_batched_gemm.hpp"
template <index_t GridSize,
index_t BlockSize,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment