Commit 3a848f0d authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into doc2

parents 64e8e30a d1e945da
......@@ -78,8 +78,9 @@ inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
index_int nglobal = std::min<index_int>(256, groups) * local;
return [=](auto f) {
launch(stream, nglobal, local)(
[=](auto idx) { idx.global_stride(n, [&](auto i) { gs_invoke(f, i, idx); }); });
launch(stream, nglobal, local)([=](auto idx) __device__ {
idx.global_stride(n, [&](auto i) { gs_invoke(f, i, idx); });
});
};
}
......
......@@ -95,7 +95,7 @@ inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int
auto nglobal = global.index(nglobal_multi);
return [=](auto f) {
launch(stream, nglobal, nlocal)([=](auto idx) {
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
auto midx = make_multi_index(global, idx.global, nglobal_multi);
f(idx, midx.for_stride(global.lens));
});
......
......@@ -20,6 +20,15 @@ struct sum
}
};
struct product
{
template <class T, class U>
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
{
return x * y;
}
};
struct id
{
template <class T>
......
......@@ -39,7 +39,12 @@ constexpr void visit_tensor_size(index_int n, F f)
f(std::integral_constant<index_int, 5>{});
break;
}
default: throw std::runtime_error("Unknown tensor size");
case 6:
{
f(std::integral_constant<index_int, 6>{});
break;
}
default: throw std::runtime_error("Tensor size dim out of range");
}
}
......
......@@ -24,7 +24,7 @@ void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) {
gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_m = idx[dim_1];
......@@ -55,7 +55,7 @@ void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument
auto* in_ptr = device_cast(input.data());
visit_tensor_size(out_lens.size(), [&](auto out_dim) {
hip_tensor_descriptor<out_dim> desc(comp_shape);
gs_launch(stream, nelements, 256)([=](auto ii) {
gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
const size_t nb = 4;
auto idx = desc.multi(ii);
std::size_t i_n = idx[dim_1];
......
......@@ -9,7 +9,7 @@ namespace device {
void log(hipStream_t stream, const argument& result, const argument& arg)
{
nary(stream, result, arg)([](auto x) { return ::log(to_hip_type(x)); });
nary(stream, result, arg)([](auto x) __device__ { return ::log(to_hip_type(x)); });
}
} // namespace device
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment