Address formatting issues and leftovers

728032d7 · Andriy Roshchenko · d74c053b · 728032d7 · 728032d7 · 728032d7
Commit 728032d7 authored Nov 19, 2024 by Andriy Roshchenko
4 changed files
--- a/example/01_gemm/gemm_dl_int4.cpp
+++ b/example/01_gemm/gemm_dl_int4.cpp
@@ -41,4 +41,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
 #include "run_gemm_example.inc"
 int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? 0 : -1); }
 #endif
\ No newline at end of file
--- a/example/01_gemm/gemm_xdl_int4.cpp
+++ b/example/01_gemm/gemm_xdl_int4.cpp
@@ -42,4 +42,4 @@ using ReferenceGemmInstance = ck::tensor_operation::host::
 #include "run_gemm_example.inc"
 int main(int argc, char* argv[]) { return (run_gemm_example(argc, argv) ? 0 : -1); }
 #endif
\ No newline at end of file
--- a/example/01_gemm/run_gemm_example.inc
+++ b/example/01_gemm/run_gemm_example.inc
@@ -381,17 +381,6 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
        {
            std::cout << std::fixed << std::setprecision(16);
-            // AccDataType a = ck::type_convert<AccDataType>(a_m_k(0, 10));
-            // AccDataType b = ck::type_convert<AccDataType>(b_k_n(0, 10));
-            // std::cout << "a(0,10): " << a << std::endl;
-            // std::cout << "b(0,10): " << b << std::endl;
-            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 0)) << std::endl;
-            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 1)) << std::endl;
-            // std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 2)) << std::endl;
-            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(0, 0)) << std::endl;
-            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(1, 0)) << std::endl;
-            // std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(2, 0)) << std::endl;
            AccDataType d = ck::type_convert<AccDataType>(c_m_n_device_result(0, 10));
            AccDataType h = ck::type_convert<AccDataType>(c_m_n_host_result(10, 0));
            std::cout << "device result: " << d << std::endl;

--- a/include/ck/utility/amd_ck_fp8.hpp
+++ b/include/ck/utility/amd_ck_fp8.hpp
@@ -264,13 +264,6 @@ static __device__ float cast_to_f32_from_f8(fp8_storage_t v)
 template <ck_fp8_interpretation_t interpret>
 static __device__ float2_t cast_to_f32x2_from_f8x2(fp8x2_storage_t v)
 {
-    // union
-    // {
-    //     unsigned int i32val;
-    //     unsigned short i16val[2];
-    // } val;
-    // val.i16val[0] = v;
    const auto i16val = bit_cast<uint16_t>(v);
    static_assert(interpret == CK_E4M3_FNUZ || interpret == CK_E4M3_OCP ||