Add OCP FP8 support in CK_TILE (#1829)

* Add OCP FP8 to CK_TILE * Validate OCP FP8 in FMHA FWD under VALID=1

Add OCP FP8 support in CK_TILE (#1829)
* Add OCP FP8 to CK_TILE * Validate OCP FP8 in FMHA FWD under VALID=1
35aebe59 · Andriy Roshchenko · GitHub · 39dc25a9 · 35aebe59 · 35aebe59
Unverified Commit 35aebe59 authored Jan 27, 2025 by Andriy Roshchenko Committed by GitHub Jan 27, 2025
5 changed files
--- a/example/ck_tile/01_fmha/CMakeLists.txt
+++ b/example/ck_tile/01_fmha/CMakeLists.txt
@@ -102,6 +102,11 @@ else()
  list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=0)
 endif()

+# conditionally specify the use of OCP_FP8
+if(CK_USE_OCP_FP8)
+  list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
+endif()
+
 # Allow comparing floating points directly in order to check sentinel values
 list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal)
 list(APPEND EXAMPLE_FMHA_BWD_COMPILE_OPTIONS -Wno-float-equal)

--- a/include/ck_tile/core/config.hpp
+++ b/include/ck_tile/core/config.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

 #if defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \
-    defined(__gfx942__)
+    defined(__gfx942__) || defined(__gfx950__)
 #define __gfx9__
 #endif
-#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
+#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__)
 #define __gfx94__
 #endif
 #if defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__) || \
@@ -230,3 +230,15 @@
 #ifndef CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
 #define CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID 1
 #endif
+
+#ifndef __HIP_DEVICE_COMPILE__ // for host code
+#ifdef CK_TILE_USE_OCP_FP8
+#define CK_TILE_USE_OCP_FP8 1
+#else
+#define CK_TILE_USE_OCP_FP8 0
+#endif
+#elif defined(__gfx950__) || defined(__gfx12__) // for GPU code
+#define CK_TILE_USE_OCP_FP8 1
+#else // for GPU code
+#define CK_TILE_USE_OCP_FP8 0
+#endif
--- a/include/ck_tile/core/numeric/float8.hpp
+++ b/include/ck_tile/core/numeric/float8.hpp
--- a/include/ck_tile/core/numeric/half.hpp
+++ b/include/ck_tile/core/numeric/half.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #include "ck_tile/core/config.hpp"
 #include "ck_tile/core/utility/bit_cast.hpp"
@@ -236,10 +236,11 @@ struct numeric_traits<half_t>
    static constexpr uint16_t head_mask = 0xFC00;
    static constexpr uint16_t mant_mask = 0x3FF;
    static constexpr uint16_t exp_mask  = 0x1F;
-    static constexpr uint32_t Inf       = 0x7C00;
-    static constexpr uint32_t NegInf    = 0xFC00;
-    static constexpr uint32_t NaN       = 0x7C01;
-    static constexpr uint32_t Neg0      = 0x8000;
+    static constexpr uint16_t abs_mask  = 0x7FFF;
+    static constexpr uint16_t Inf       = 0x7C00;
+    static constexpr uint16_t NegInf    = 0xFC00;
+    static constexpr uint16_t NaN       = 0x7C01;
+    static constexpr uint16_t Neg0      = 0x8000;
    using bitwise_type                  = uint16_t;
 };


--- a/include/ck_tile/core/numeric/numeric.hpp
+++ b/include/ck_tile/core/numeric/numeric.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

@@ -89,6 +89,7 @@ struct numeric_traits<float>
    static constexpr uint32_t head_mask = 0xFF800000;
    static constexpr uint32_t mant_mask = 0x7FFFFF;
    static constexpr uint32_t exp_mask  = 0xFF;
+    static constexpr uint32_t abs_mask  = 0x7FFFFFFF;
    static constexpr uint32_t Inf       = 0x7F800000;
    static constexpr uint32_t NegInf    = 0xFF800000;
    static constexpr uint32_t NaN       = 0x7F800001;