Merge branch 'develop_v2.7' into release_v2.7

8bb7aea1 · wenjh · bfcd6493 · 4922108e · 8bb7aea1
Commit 8bb7aea1 authored Sep 12, 2025 by wenjh
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 0 deletions

transformer_engine/common/transformer_engine.cpp transformer_engine/common/transformer_engine.cpp +12 -0

No files found.
--- a/transformer_engine/common/transformer_engine.cpp
+++ b/transformer_engine/common/transformer_engine.cpp
@@ -350,6 +350,12 @@ NVTEDType nvte_tensor_type(const NVTETensor tensor) {
  return static_cast<NVTEDType>(t->dtype());
 }

+// Because of a HIP compiler bug, we need to disable optimizations here
+// when compiling for AMD GPUs while test_float8blockwisetensor.py.
+// Todo: remove this once the HIP compiler bug is fixed.
+#ifdef __HIP_PLATFORM_AMD__
+#pragma clang optimize off
+#endif
 NVTEShape nvte_make_shape(const size_t *data, size_t ndim) {
  NVTEShape ret;
  if (ndim == 0) {
@@ -363,6 +369,12 @@ NVTEShape nvte_make_shape(const size_t *data, size_t ndim) {
  ret.ndim = ndim;
  return ret;
 }
+// Because of a HIP compiler bug, we need to disable optimizations here
+// when compiling for AMD GPUs while test_float8blockwisetensor.py.
+// Todo: remove this once the HIP compiler bug is fixed.
+#ifdef __HIP_PLATFORM_AMD__
+#pragma clang optimize on
+#endif

 NVTEShape nvte_tensor_shape(const NVTETensor tensor) {
  auto *t = transformer_engine::convertNVTETensor(tensor);