add bf16

9fed1f5d · zhuwenwen · 3f1166ab · 9fed1f5d · 9fed1f5d · 9fed1f5d
Commit 9fed1f5d authored Mar 23, 2024 by zhuwenwen
Showing with 9 additions and 9 deletions

csrc/attention/attention_dtypes.h csrc/attention/attention_dtypes.h +1 -1

csrc/attention/attention_kernels.cu csrc/attention/attention_kernels.cu +4 -4

csrc/cache_kernels.cu csrc/cache_kernels.cu +4 -4

No files found.
--- a/csrc/attention/attention_dtypes.h
+++ b/csrc/attention/attention_dtypes.h
@@ -3,5 +3,5 @@
 #include "attention_generic.cuh"
 #include "dtype_float16.cuh"
 #include "dtype_float32.cuh"
-// #include "dtype_bfloat16.cuh"
+#include "dtype_bfloat16.cuh"
 // #include "dtype_fp8_e5m2.cuh"
--- a/csrc/attention/attention_kernels.cu
+++ b/csrc/attention/attention_kernels.cu
@@ -734,8 +734,8 @@ void paged_attention_v1(
      CALL_V1_LAUNCHER_BLOCK_SIZE(float, float, false);
    } else if (query.dtype() == at::ScalarType::Half) {
      CALL_V1_LAUNCHER_BLOCK_SIZE(uint16_t, uint16_t, false);
-    // } else if (query.dtype() == at::ScalarType::BFloat16) {
-    //   CALL_V1_LAUNCHER_BLOCK_SIZE(__nv_bfloat16, __nv_bfloat16, false);
+    } else if (query.dtype() == at::ScalarType::BFloat16) {
+      CALL_V1_LAUNCHER_BLOCK_SIZE(__nv_bfloat16, __nv_bfloat16, false);
    } else {
      TORCH_CHECK(false, "Unsupported data type: ", query.dtype());
    }
@@ -927,8 +927,8 @@ void paged_attention_v2(
      CALL_V2_LAUNCHER_BLOCK_SIZE(float, float, false);
    } else if (query.dtype() == at::ScalarType::Half) {
      CALL_V2_LAUNCHER_BLOCK_SIZE(uint16_t, uint16_t, false);
-    // } else if (query.dtype() == at::ScalarType::BFloat16) {
-    //   CALL_V2_LAUNCHER_BLOCK_SIZE(__nv_bfloat16, __nv_bfloat16, false);
+    } else if (query.dtype() == at::ScalarType::BFloat16) {
+      CALL_V2_LAUNCHER_BLOCK_SIZE(__nv_bfloat16, __nv_bfloat16, false);
    } else {
      TORCH_CHECK(false, "Unsupported data type: ", query.dtype());
    }

--- a/csrc/cache_kernels.cu
+++ b/csrc/cache_kernels.cu
@@ -13,10 +13,10 @@
 #include <map>
 #include <vector>

-// #ifdef USE_ROCM
-//   #include <hip/hip_bf16.h>
-//   typedef __hip_bfloat16 __nv_bfloat16;
-// #endif
+#ifdef USE_ROCM
+  #include <hip/hip_bf16.h>
+  typedef __hip_bfloat16 __nv_bfloat16;
+#endif

 void swap_blocks(
  torch::Tensor& src,