Commit 54e3ce2e authored by Paul's avatar Paul
Browse files

Format

parent 05605886
...@@ -27,105 +27,104 @@ __device__ vec<int32_t, 4> make_wave_buffer_resource(T* p_wave, index_int bytes) ...@@ -27,105 +27,104 @@ __device__ vec<int32_t, 4> make_wave_buffer_resource(T* p_wave, index_int bytes)
return result.content; return result.content;
} }
template<class T> template <class T>
struct raw_buffer_load; struct raw_buffer_load;
#define MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(m) \ #define MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(m) m(i8, int8_t) m(i16, int16_t) m(f16, half) m(f32, float)
m(i8, int8_t) \
m(i16, int16_t) \
m(f16, half) \
m(f32, float)
#define MIGRAPHX_RAW_BUFFER_LOAD(llvmtype, ...) \ #define MIGRAPHX_RAW_BUFFER_LOAD(llvmtype, ...) \
__device__ __VA_ARGS__ \ __device__ __VA_ARGS__ llvm_amdgcn_raw_buffer_load_##llvmtype( \
llvm_amdgcn_raw_buffer_load_ ## llvmtype(vec<int32_t, 4> srsrc, \ vec<int32_t, 4> srsrc, \
int32_t voffset, \ int32_t voffset, \
int32_t soffset, \ int32_t soffset, \
int32_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load." #llvmtype); \ int32_t glc_slc) __asm("llvm.amdgcn.raw.buffer.load." #llvmtype); \
template<> \ template <> \
struct raw_buffer_load<__VA_ARGS__> { \ struct raw_buffer_load<__VA_ARGS__> \
{ \
static __device__ __VA_ARGS__ apply(vec<int32_t, 4> srsrc, \ static __device__ __VA_ARGS__ apply(vec<int32_t, 4> srsrc, \
int32_t voffset, \ int32_t voffset, \
int32_t soffset, \ int32_t soffset, \
int32_t glc_slc) { \ int32_t glc_slc) \
return llvm_amdgcn_raw_buffer_load_ ## llvmtype(srsrc, voffset, soffset, glc_slc); \ { \
return llvm_amdgcn_raw_buffer_load_##llvmtype(srsrc, voffset, soffset, glc_slc); \
} \ } \
}; };
#define MIGRAPHX_RAW_BUFFER_LOAD_VEC(llvmtype, cpptype) \ #define MIGRAPHX_RAW_BUFFER_LOAD_VEC(llvmtype, cpptype) \
MIGRAPHX_RAW_BUFFER_LOAD(llvmtype, cpptype) \ MIGRAPHX_RAW_BUFFER_LOAD(llvmtype, cpptype) \
MIGRAPHX_RAW_BUFFER_LOAD(v2 ## llvmtype, vec<cpptype, 2>) \ MIGRAPHX_RAW_BUFFER_LOAD(v2##llvmtype, vec<cpptype, 2>) \
MIGRAPHX_RAW_BUFFER_LOAD(v4 ## llvmtype, vec<cpptype, 4>) MIGRAPHX_RAW_BUFFER_LOAD(v4##llvmtype, vec<cpptype, 4>)
MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(MIGRAPHX_RAW_BUFFER_LOAD_VEC) MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(MIGRAPHX_RAW_BUFFER_LOAD_VEC)
#define MIGRAPHX_RAW_BUFFER_STORE(llvmtype, ...) \ #define MIGRAPHX_RAW_BUFFER_STORE(llvmtype, ...) \
__device__ void \ __device__ void llvm_amdgcn_raw_buffer_store_##llvmtype( \
llvm_amdgcn_raw_buffer_store_ ## llvmtype(__VA_ARGS__ vdata, vec<int32_t, 4> srsrc, \ __VA_ARGS__ vdata, \
vec<int32_t, 4> srsrc, \
int32_t voffset, \ int32_t voffset, \
int32_t soffset, \ int32_t soffset, \
int32_t glc_slc) __asm("llvm.amdgcn.raw.buffer.store." #llvmtype); \ int32_t glc_slc) __asm("llvm.amdgcn.raw.buffer.store." #llvmtype); \
__device__ void raw_buffer_store(__VA_ARGS__ vdata, vec<int32_t, 4> srsrc, \ __device__ void raw_buffer_store(__VA_ARGS__ vdata, \
vec<int32_t, 4> srsrc, \
int32_t voffset, \ int32_t voffset, \
int32_t soffset, \ int32_t soffset, \
int32_t glc_slc) { \ int32_t glc_slc) \
llvm_amdgcn_raw_buffer_store_ ## llvmtype(vdata, srsrc, voffset, soffset, glc_slc); \ { \
llvm_amdgcn_raw_buffer_store_##llvmtype(vdata, srsrc, voffset, soffset, glc_slc); \
} }
#define MIGRAPHX_RAW_BUFFER_STORE_VEC(llvmtype, cpptype) \ #define MIGRAPHX_RAW_BUFFER_STORE_VEC(llvmtype, cpptype) \
MIGRAPHX_RAW_BUFFER_STORE(llvmtype, cpptype) \ MIGRAPHX_RAW_BUFFER_STORE(llvmtype, cpptype) \
MIGRAPHX_RAW_BUFFER_STORE(v2 ## llvmtype, vec<cpptype, 2>) \ MIGRAPHX_RAW_BUFFER_STORE(v2##llvmtype, vec<cpptype, 2>) \
MIGRAPHX_RAW_BUFFER_STORE(v4 ## llvmtype, vec<cpptype, 4>) MIGRAPHX_RAW_BUFFER_STORE(v4##llvmtype, vec<cpptype, 4>)
MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(MIGRAPHX_RAW_BUFFER_STORE_VEC) MIGRAPHX_BUFFER_ADDR_VISIT_TYPES(MIGRAPHX_RAW_BUFFER_STORE_VEC)
template<class T, index_int N> template <class T, index_int N>
struct raw_buffer_load<vec<T, N>> struct raw_buffer_load<vec<T, N>>
{ {
static __device__ vec<T, N> apply(vec<int32_t, 4> srsrc, static __device__ vec<T, N>
int32_t voffset, apply(vec<int32_t, 4> srsrc, int32_t voffset, int32_t soffset, int32_t glc_slc)
int32_t soffset,
int32_t glc_slc)
{ {
static_assert(N % 2 == 0, "Invalid vector size"); static_assert(N % 2 == 0, "Invalid vector size");
union type union type
{ {
vec<T, N> data; vec<T, N> data;
vec<T, N/2> reg[2]; vec<T, N / 2> reg[2];
}; };
type result; type result;
auto offset = sizeof(T) * (N/2); auto offset = sizeof(T) * (N / 2);
result.reg[0] = raw_buffer_load<vec<T, N/2>>::apply(srsrc, voffset+offset, soffset, glc_slc); result.reg[0] =
result.reg[1] = raw_buffer_load<vec<T, N/2>>::apply(srsrc, voffset, soffset, glc_slc); raw_buffer_load<vec<T, N / 2>>::apply(srsrc, voffset + offset, soffset, glc_slc);
result.reg[1] = raw_buffer_load<vec<T, N / 2>>::apply(srsrc, voffset, soffset, glc_slc);
return result.data; return result.data;
} }
}; };
template<class T, index_int N> template <class T, index_int N>
__device__ void raw_buffer_store(vec<T, N> vdata, vec<int32_t, 4> srsrc, __device__ void raw_buffer_store(
int32_t voffset, vec<T, N> vdata, vec<int32_t, 4> srsrc, int32_t voffset, int32_t soffset, int32_t glc_slc)
int32_t soffset, {
int32_t glc_slc) {
union type union type
{ {
vec<T, N> data; vec<T, N> data;
vec<T, N/2> reg[2]; vec<T, N / 2> reg[2];
}; };
type x; type x;
x.data = vdata; x.data = vdata;
auto offset = sizeof(T) * (N/2); auto offset = sizeof(T) * (N / 2);
raw_buffer_store(x.reg[0], srsrc, voffset, soffset, glc_slc); raw_buffer_store(x.reg[0], srsrc, voffset, soffset, glc_slc);
raw_buffer_store(x.reg[1], srsrc, voffset+offset, soffset, glc_slc); raw_buffer_store(x.reg[1], srsrc, voffset + offset, soffset, glc_slc);
} }
template<class T> template <class T>
__device__ T buffer_load(const T* p, index_int offset, index_int size, address_space::global) __device__ T buffer_load(const T* p, index_int offset, index_int size, address_space::global)
{ {
auto resource = make_wave_buffer_resource(p, size * sizeof(T)); auto resource = make_wave_buffer_resource(p, size * sizeof(T));
return raw_buffer_load<T>::apply(resource, offset * sizeof(T), 0, 0); return raw_buffer_load<T>::apply(resource, offset * sizeof(T), 0, 0);
} }
template<class T> template <class T>
__device__ void buffer_store(T data, T* p, index_int offset, index_int size, address_space::global) __device__ void buffer_store(T data, T* p, index_int offset, index_int size, address_space::global)
{ {
auto resource = make_wave_buffer_resource(p, size * sizeof(T)); auto resource = make_wave_buffer_resource(p, size * sizeof(T));
...@@ -134,18 +133,17 @@ __device__ void buffer_store(T data, T* p, index_int offset, index_int size, add ...@@ -134,18 +133,17 @@ __device__ void buffer_store(T data, T* p, index_int offset, index_int size, add
#endif #endif
template<class T, class AddressSpace> template <class T, class AddressSpace>
__device__ T buffer_load(const T* p, index_int offset, index_int, AddressSpace) __device__ T buffer_load(const T* p, index_int offset, index_int, AddressSpace)
{ {
return p[offset]; return p[offset];
} }
template<class T, class AddressSpace> template <class T, class AddressSpace>
__device__ void buffer_store(T data, T* p, index_int offset, index_int, AddressSpace) __device__ void buffer_store(T data, T* p, index_int offset, index_int, AddressSpace)
{ {
p[offset] = data; p[offset] = data;
} }
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_BUFFER_ADDRESS_HPP #endif // MIGRAPHX_GUARD_KERNELS_BUFFER_ADDRESS_HPP
...@@ -41,8 +41,12 @@ using half2 = migraphx::vec<half, 2>; ...@@ -41,8 +41,12 @@ using half2 = migraphx::vec<half, 2>;
struct address_space struct address_space
{ {
struct global {}; struct global
struct local {}; {
};
struct local
{
};
}; };
} // namespace migraphx } // namespace migraphx
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment