Make release_v2.9 compile pass

99e60246 · wenjh · cbb14a5f · 99e60246 · 99e60246 · 99e60246
Commit 99e60246 authored Dec 03, 2025 by wenjh
3 changed files
--- a/transformer_engine/common/include/transformer_engine/gemm.h
+++ b/transformer_engine/common/include/transformer_engine/gemm.h
@@ -130,7 +130,7 @@ void nvte_cublas_gemm(const NVTETensor A, const NVTETensor B, NVTETensor D, cons
 */
 void nvte_cublas_gemm_v2(int transa, int transb, const float *alpha, const NVTETensor A,
                         const NVTETensor B, const float *beta, const NVTETensor C, NVTETensor D,
-                         NVTETensor workspace, NVTEMatmulConfig config, cudaStream_t stream, bool nvte_use_hipblaslt, bool nvte_use_rocblas, int compute_stream_offset);
+                         NVTETensor workspace, NVTEMatmulConfig config, cudaStream_t stream, bool nvte_use_hipblaslt = 0, bool nvte_use_rocblas = 0, int compute_stream_offset = 0);

 /*! \brief Compute matrix multiplication of 2 matrices, potentially fused with other operations,
 * allowing for using a scaling factor for the GEMM result and the accumulation input (deprecated)

--- a/transformer_engine/common/swizzle/swizzle_block_scaling.cu
+++ b/transformer_engine/common/swizzle/swizzle_block_scaling.cu
@@ -14,7 +14,7 @@
 #include "../util/logging.h"
 #include "transformer_engine/transformer_engine.h"

-#ifdef __HIP_PLATFORM_AMD__
+#ifndef __HIP_PLATFORM_AMD__
 namespace transformer_engine {
 namespace {
 constexpr uint32_t WARP_SIZE = 32;

--- a/transformer_engine/pytorch/csrc/util.cpp
+++ b/transformer_engine/pytorch/csrc/util.cpp
@@ -228,7 +228,7 @@ at::Tensor convert_block_scaling_to_mxfp8_tensor(transformer_engine::TensorWrapp
  // Allocate memory for swizzled mxfp8 scaling factors
  const auto options = at::TensorOptions().dtype(torch::kByte).device(torch::kCUDA);
  at::Tensor swizzled_scale_inv = at::empty(
-      std::vector<int64_t>{swizzled_scale_inv_first_dim, swizzled_scale_inv_last_dim}, options);
+      std::vector<int64_t>{static_cast<int64_t>(swizzled_scale_inv_first_dim), static_cast<int64_t>(swizzled_scale_inv_last_dim)}, options);
  // Set rowwise scaling factors on output
  void* const swizzled_scale_inv_dptr = getDataPtr(swizzled_scale_inv, 0);
  NVTEShape swizzled_scale_inv_shape{};