minor changes to print out correct information

55f47962 · Shucai Xiao · 69c5c485 · 55f47962 · 55f47962
Commit 55f47962 authored Apr 30, 2019 by Shucai Xiao
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 7 deletions

src/include/migraphx/tensor_view.hpp src/include/migraphx/tensor_view.hpp +8 -2

src/targets/gpu/quant_gemm.cpp src/targets/gpu/quant_gemm.cpp +12 -5

No files found.
--- a/src/include/migraphx/tensor_view.hpp
+++ b/src/include/migraphx/tensor_view.hpp
@@ -12,6 +12,12 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+template<class T>
+T as_number(T x) { return x; }
+inline int32_t as_number(int8_t x) { return static_cast<int32_t>(x); }
+inline uint32_t as_number(uint8_t x) { return static_cast<uint8_t>(x); }
+
+
 template <class T>
 struct tensor_view
 {
@@ -130,10 +136,10 @@ struct tensor_view
    {
        if(!x.empty())
        {
-            os << x.front();
+            os << as_number(x.front());
            for(std::size_t i = 1; i < x.m_shape.elements(); i++)
            {
-                os << ", " << x.m_data[x.m_shape.index(i)];
+                os << ", " << as_number(x.m_data[x.m_shape.index(i)]);
            }
        }
        return os;

--- a/src/targets/gpu/quant_gemm.cpp
+++ b/src/targets/gpu/quant_gemm.cpp
@@ -90,23 +90,30 @@ argument miopen_quant_gemm::compute(context& ctx,
        assert(transa or (lda % 4 == 0));
        assert(!transb or (ldb % 4 == 0));

+        auto arg_0 = migraphx::gpu::from_gpu(args[0]);
+        auto arg_1 = migraphx::gpu::from_gpu(args[1]);
+        auto arg_2 = migraphx::gpu::from_gpu(args[2]);
+        std::cout << "arg_0 = " << arg_0 << std::endl;
+        std::cout << "arg_1 = " << arg_1 << std::endl;
+        std::cout << "arg_2 = " << arg_2 << std::endl;
+
        auto num_matrices = std::accumulate(
            out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
        if(num_matrices == 1)
        {
            generic_rocblas_gemm_ex(ctx.get_stream().get_rocblas(),
-                                    transb ? rocblas_operation_transpose : rocblas_operation_none,
                                    transa ? rocblas_operation_transpose : rocblas_operation_none,
-                                    n,
+                                    transb ? rocblas_operation_transpose : rocblas_operation_none,
                                    m,
+                                    n,
                                    k,
                                    &alpha_r,
-                                    to_pointer(args[1]),
-                                    rocblas_datatype_i8_r,
-                                    ldb,
                                    to_pointer(args[0]),
                                    rocblas_datatype_i8_r,
                                    lda,
+                                    to_pointer(args[1]),
+                                    rocblas_datatype_i8_r,
+                                    ldb,
                                    &beta_r,
                                    to_pointer(args[2]),
                                    rocblas_datatype_i32_r,