Fix building on nmz

Signed-off-by: wenjh <wenjh@sugon.com>

Fix building on nmz
Signed-off-by: wenjh <wenjh@sugon.com>
0fce42f7 · wenjh · 13123839 · 0fce42f7 · 0fce42f7 · 0fce42f7
Commit 0fce42f7 authored Jan 12, 2026 by wenjh
9 changed files
--- a/tests/cpp/operator/test_cast_float8blockwise.cu
+++ b/tests/cpp/operator/test_cast_float8blockwise.cu
@@ -4,6 +4,9 @@
 * See LICENSE for license information.
 ************************************************************************/
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_bf16.h>
 #include <cuda_fp8.h>
 #include <cuda_runtime.h>

--- a/tests/cpp/operator/test_cast_mxfp8.cu
+++ b/tests/cpp/operator/test_cast_mxfp8.cu
@@ -4,6 +4,9 @@
 * See LICENSE for license information.
 ************************************************************************/
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_bf16.h>
 #include <cuda_fp8.h>
 #include <cuda_runtime.h>

--- a/tests/cpp/operator/test_cast_mxfp8_gated_swiglu.cu
+++ b/tests/cpp/operator/test_cast_mxfp8_gated_swiglu.cu
@@ -4,6 +4,9 @@
 * See LICENSE for license information.
 ************************************************************************/
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_bf16.h>
 #include <cuda_fp8.h>
 #include <cuda_runtime.h>

--- a/tests/cpp/operator/test_dequantize_mxfp8.cu
+++ b/tests/cpp/operator/test_dequantize_mxfp8.cu
@@ -10,7 +10,9 @@
 #include <memory>
 #include <random>
 #include <limits>
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_bf16.h>
 #include <cuda_fp8.h>
 #include <cuda_runtime.h>

--- a/tests/cpp/test_common.h
+++ b/tests/cpp/test_common.h
@@ -15,6 +15,9 @@
 #endif
 #define FP4_TYPE_SUPPORTED (CUDA_VERSION >= 12080)
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_runtime_api.h>
 #include <cuda_bf16.h>
 #include <cuda_fp16.h>

--- a/transformer_engine/common/multi_tensor/adam.cu
+++ b/transformer_engine/common/multi_tensor/adam.cu
@@ -5,6 +5,9 @@
 ************************************************************************/
 #include <assert.h>
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_fp8.h>
 #include <transformer_engine/multi_tensor.h>
 #include <transformer_engine/transformer_engine.h>

--- a/transformer_engine/common/multi_tensor/compute_scale.cu
+++ b/transformer_engine/common/multi_tensor/compute_scale.cu
@@ -7,6 +7,9 @@
 #include <limits>
 // Stringstream is a big hammer, but I want to rely on operator<< for dtype.
 #include <assert.h>
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_fp8.h>
 #include <transformer_engine/multi_tensor.h>
 #include <transformer_engine/transformer_engine.h>

--- a/transformer_engine/common/multi_tensor/scale.cu
+++ b/transformer_engine/common/multi_tensor/scale.cu
@@ -5,6 +5,9 @@
 ************************************************************************/
 #include <assert.h>
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_fp8.h>
 // Stringstream is a big hammer, but I want to rely on operator<< for dtype.
 #include <transformer_engine/multi_tensor.h>

--- a/transformer_engine/common/multi_tensor/sgd.cu
+++ b/transformer_engine/common/multi_tensor/sgd.cu
@@ -5,6 +5,9 @@
 ************************************************************************/
 #include <assert.h>
+#ifdef __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime.h>
+#endif
 #include <cuda_fp8.h>
 #include <transformer_engine/multi_tensor.h>
 #include <transformer_engine/transformer_engine.h>