Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
TransformerEngine
Commits
2bb532fb
Commit
2bb532fb
authored
Feb 04, 2026
by
wenjh
Browse files
Remove dump code of tensorwise_int8_bgrad_kernel
Signed-off-by:
wenjh
<
wenjh@sugon.com
>
parent
d2c77acc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
16 deletions
+7
-16
transformer_engine/common/gemm/cublaslt_gemm.cu
transformer_engine/common/gemm/cublaslt_gemm.cu
+2
-0
transformer_engine/common/gemm/rocm_gemm.cu
transformer_engine/common/gemm/rocm_gemm.cu
+5
-16
No files found.
transformer_engine/common/gemm/cublaslt_gemm.cu
View file @
2bb532fb
...
...
@@ -1222,6 +1222,8 @@ void nvte_cublas_atomic_gemm(const NVTETensor A, const NVTETensor B, NVTETensor
const
char
*
NVTE_FORCE_ROCM_GEMM
=
std
::
getenv
(
"NVTE_FORCE_ROCM_GEMM"
);
const
bool
use_fp8
=
is_fp8_dtype
(
inputA
->
data
.
dtype
)
||
is_fp8_dtype
(
inputB
->
data
.
dtype
);
const
char
*
NVTE_INT8_SIM_FP8_TENSORWISE
=
std
::
getenv
(
"NVTE_INT8_SIM_FP8_TENSORWISE"
);
if
(
NVTE_INT8_SIM_FP8_TENSORWISE
!=
nullptr
&&
NVTE_INT8_SIM_FP8_TENSORWISE
[
0
]
==
'1'
&&
use_int8
&&
use_split_accumulator
)
nvte_use_hipblaslt
=
1
;
if
((
biasTensor
->
data
.
dptr
!=
nullptr
)
||
(
outputGelu
->
data
.
dptr
!=
nullptr
)
||
(
use_fp8
)
||
(
NVTE_FORCE_ROCM_GEMM
!=
nullptr
&&
NVTE_FORCE_ROCM_GEMM
[
0
]
==
'1'
)
||
(
nvte_use_hipblaslt
)
||
(
nvte_use_rocblas
))
{
cublas_gemm
(
inputA
,
inputB
,
outputD
,
biasTensor
,
outputGelu
,
m
,
n
,
k
,
lda
,
ldb
,
ldd
,
transa
,
transb
,
grad
,
wspace
->
data
.
dptr
,
wspace
->
data
.
shape
[
0
],
accumulate
,
use_split_accumulator
,
math_sm_count
,
...
...
transformer_engine/common/gemm/rocm_gemm.cu
View file @
2bb532fb
...
...
@@ -1038,22 +1038,11 @@ void hipblaslt_gemm(const Tensor* inputA, const Tensor* inputB, Tensor* outputD,
operationDesc
,
HIPBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD
,
&
ld_gelumat
,
sizeof
(
ld_gelumat
)));
}
else
if
(
bias
)
{
if
(
tensorwise_int8
)
{
if
(
grad
)
{
int
batch_size
=
k
;
int
output_dim
=
n
;
DType
te_bias_dtype
=
get_transformer_engine_dtype_from_hipblaslt_dtype
(
bias_type
);
TRANSFORMER_ENGINE_TYPE_SWITCH_NON_FP8ONLY
(
te_bias_dtype
,
BType
,
·
detail
::
tensorwise_int8_bias_gradient_kernelLauncher
<
BType
>
(
reinterpret_cast
<
const
int8_t
*>
(
B
),
reinterpret_cast
<
BType
*>
(
bias_ptr
),
B_scale_inverse_float
,
batch_size
,
output_dim
,
stream
););
}
else
{
NVTE_CHECK_HIPBLASLT
(
hipblasLtMatmulDescSetAttribute
(
operationDesc
,
HIPBLASLT_MATMUL_DESC_BIAS_DATA_TYPE
,
&
bias_type
,
sizeof
(
bias_type
)));
epilogue
=
HIPBLASLT_EPILOGUE_BIAS
;
NVTE_CHECK_HIPBLASLT
(
hipblasLtMatmulDescSetAttribute
(
operationDesc
,
HIPBLASLT_MATMUL_DESC_BIAS_POINTER
,
&
bias_ptr
,
sizeof
(
bias_ptr
)));
}
}
else
{
if
(
grad
)
{
// grad output is always input B
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment