Remove dump code of tensorwise_int8_bgrad_kernel

Signed-off-by: wenjh <wenjh@sugon.com>

Remove dump code of tensorwise_int8_bgrad_kernel
Signed-off-by: wenjh <wenjh@sugon.com>
2bb532fb · wenjh · d2c77acc · 2bb532fb · 2bb532fb
Commit 2bb532fb authored Feb 04, 2026 by wenjh
Showing with 7 additions and 16 deletions

transformer_engine/common/gemm/cublaslt_gemm.cu transformer_engine/common/gemm/cublaslt_gemm.cu +2 -0

transformer_engine/common/gemm/rocm_gemm.cu transformer_engine/common/gemm/rocm_gemm.cu +5 -16

No files found.
--- a/transformer_engine/common/gemm/cublaslt_gemm.cu
+++ b/transformer_engine/common/gemm/cublaslt_gemm.cu
@@ -1222,6 +1222,8 @@ void nvte_cublas_atomic_gemm(const NVTETensor A, const NVTETensor B, NVTETensor
  const char *NVTE_FORCE_ROCM_GEMM = std::getenv("NVTE_FORCE_ROCM_GEMM");
  const bool use_fp8 = is_fp8_dtype(inputA->data.dtype) ||
                       is_fp8_dtype(inputB->data.dtype);
+  const char *NVTE_INT8_SIM_FP8_TENSORWISE = std::getenv("NVTE_INT8_SIM_FP8_TENSORWISE");      
+  if (NVTE_INT8_SIM_FP8_TENSORWISE != nullptr && NVTE_INT8_SIM_FP8_TENSORWISE[0] == '1' && use_int8 && use_split_accumulator) nvte_use_hipblaslt = 1;           
  if ((biasTensor->data.dptr != nullptr) || (outputGelu->data.dptr!=nullptr) || (use_fp8) || (NVTE_FORCE_ROCM_GEMM != nullptr && NVTE_FORCE_ROCM_GEMM[0] == '1') || (nvte_use_hipblaslt) || (nvte_use_rocblas)) {
    cublas_gemm(inputA, inputB, outputD, biasTensor, outputGelu, m, n, k, lda, ldb, ldd, transa, transb, grad,
                wspace->data.dptr, wspace->data.shape[0], accumulate, use_split_accumulator, math_sm_count, 

--- a/transformer_engine/common/gemm/rocm_gemm.cu
+++ b/transformer_engine/common/gemm/rocm_gemm.cu
@@ -1038,22 +1038,11 @@ void hipblaslt_gemm(const Tensor* inputA, const Tensor* inputB, Tensor* outputD,
        operationDesc, HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD, &ld_gelumat, sizeof(ld_gelumat)));
  } else if (bias) {
    if (tensorwise_int8) {
-      if (grad) {
-        int batch_size = k;
-        int output_dim = n;
-        DType te_bias_dtype = get_transformer_engine_dtype_from_hipblaslt_dtype(bias_type);
-        TRANSFORMER_ENGINE_TYPE_SWITCH_NON_FP8ONLY(
-          te_bias_dtype, BType,·
-          detail::tensorwise_int8_bias_gradient_kernelLauncher<BType>(
-            reinterpret_cast<const int8_t*>(B), reinterpret_cast<BType*>(bias_ptr), B_scale_inverse_float, batch_size,
-            output_dim, stream););
-      } else {
      NVTE_CHECK_HIPBLASLT(hipblasLtMatmulDescSetAttribute(
        operationDesc, HIPBLASLT_MATMUL_DESC_BIAS_DATA_TYPE, &bias_type, sizeof(bias_type)));
      epilogue = HIPBLASLT_EPILOGUE_BIAS;
      NVTE_CHECK_HIPBLASLT(hipblasLtMatmulDescSetAttribute(
        operationDesc, HIPBLASLT_MATMUL_DESC_BIAS_POINTER, &bias_ptr, sizeof(bias_ptr)));
-      }
    } else {
      if (grad) {
        // grad output is always input B