Fix std::bad_alloc on test_float8blockwisetensor.py

Signed-off-by: wenjh <wenjh@sugon.com>

Fix std::bad_alloc on test_float8blockwisetensor.py
Signed-off-by: wenjh <wenjh@sugon.com>
4922108e · wenjh · fca88163 · 4922108e
Commit 4922108e authored Sep 12, 2025 by wenjh
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 0 deletions

transformer_engine/common/transformer_engine.cpp transformer_engine/common/transformer_engine.cpp +12 -0

No files found.
--- a/transformer_engine/common/transformer_engine.cpp
+++ b/transformer_engine/common/transformer_engine.cpp
@@ -350,6 +350,12 @@ NVTEDType nvte_tensor_type(const NVTETensor tensor) {
  return static_cast<NVTEDType>(t->dtype());
 }
+// Because of a HIP compiler bug, we need to disable optimizations here
+// when compiling for AMD GPUs while test_float8blockwisetensor.py.
+// Todo: remove this once the HIP compiler bug is fixed.
+#ifdef __HIP_PLATFORM_AMD__
+#pragma clang optimize off
+#endif
 NVTEShape nvte_make_shape(const size_t *data, size_t ndim) {
  NVTEShape ret;
  if (ndim == 0) {
@@ -363,6 +369,12 @@ NVTEShape nvte_make_shape(const size_t *data, size_t ndim) {
  ret.ndim = ndim;
  return ret;
 }
+// Because of a HIP compiler bug, we need to disable optimizations here
+// when compiling for AMD GPUs while test_float8blockwisetensor.py.
+// Todo: remove this once the HIP compiler bug is fixed.
+#ifdef __HIP_PLATFORM_AMD__
+#pragma clang optimize on
+#endif
 NVTEShape nvte_tensor_shape(const NVTETensor tensor) {
  auto *t = transformer_engine::convertNVTETensor(tensor);