Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
TransformerEngine
Commits
bd05b0dc
Commit
bd05b0dc
authored
Jan 23, 2026
by
wenjh
Browse files
Merge branch 'develop_v2.10' into release_v2.10
parents
7aeb5a72
8fc9d8f1
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
1 deletion
+15
-1
tests/cpp/test_common.cu
tests/cpp/test_common.cu
+11
-0
tests/cpp/test_common.h
tests/cpp/test_common.h
+3
-1
transformer_engine/common/transpose/quantize_transpose_vector_blockwise.cu
...e/common/transpose/quantize_transpose_vector_blockwise.cu
+1
-0
No files found.
tests/cpp/test_common.cu
View file @
bd05b0dc
...
@@ -21,6 +21,8 @@
...
@@ -21,6 +21,8 @@
#include <transformer_engine/transformer_engine.h>
#include <transformer_engine/transformer_engine.h>
#include "util/logging.h"
#include "util/logging.h"
#define FP4_TYPE_SUPPORTED (CUDA_VERSION >= 12080)
namespace
test
{
namespace
test
{
size_t
create_seed_from_tensor_name
(
const
std
::
string
&
tensor_name
)
{
size_t
create_seed_from_tensor_name
(
const
std
::
string
&
tensor_name
)
{
...
@@ -343,10 +345,15 @@ Tensor::Tensor(const std::string& name,
...
@@ -343,10 +345,15 @@ Tensor::Tensor(const std::string& name,
}
}
}
}
#if FP4_TYPE_SUPPORTED
const
DType
rowwise_type
=
(
scaling_mode
==
NVTE_NVFP4_1D_SCALING
)
?
DType
::
kFloat4E2M1
:
type
;
const
DType
rowwise_type
=
(
scaling_mode
==
NVTE_NVFP4_1D_SCALING
)
?
DType
::
kFloat4E2M1
:
type
;
const
DType
colwise_type
=
(
scaling_mode
==
NVTE_NVFP4_1D_SCALING
)
?
DType
::
kFloat4E2M1
:
type
;
const
DType
colwise_type
=
(
scaling_mode
==
NVTE_NVFP4_1D_SCALING
)
?
DType
::
kFloat4E2M1
:
type
;
tensor_
.
set_rowwise_data
(
dptr_rowwise
,
rowwise_type
,
shape
);
tensor_
.
set_rowwise_data
(
dptr_rowwise
,
rowwise_type
,
shape
);
tensor_
.
set_columnwise_data
(
dptr_columnwise
,
colwise_type
,
columnwise_shape
);
tensor_
.
set_columnwise_data
(
dptr_columnwise
,
colwise_type
,
columnwise_shape
);
#else
tensor_
.
set_rowwise_data
(
dptr_rowwise
,
type
,
shape
);
tensor_
.
set_columnwise_data
(
dptr_columnwise
,
type
,
columnwise_shape
);
#endif
if
(
isFp8Type
(
type
)
||
isFp4Type
(
type
))
{
if
(
isFp8Type
(
type
)
||
isFp4Type
(
type
))
{
if
(
scaling_mode
==
NVTE_DELAYED_TENSOR_SCALING
)
{
if
(
scaling_mode
==
NVTE_DELAYED_TENSOR_SCALING
)
{
...
@@ -1041,7 +1048,11 @@ bool isFp8Type(DType type) {
...
@@ -1041,7 +1048,11 @@ bool isFp8Type(DType type) {
}
}
bool
isFp4Type
(
DType
type
)
{
bool
isFp4Type
(
DType
type
)
{
#if FP4_TYPE_SUPPORTED
return
type
==
DType
::
kFloat4E2M1
;
return
type
==
DType
::
kFloat4E2M1
;
#else
return
false
;
#endif
}
}
int32_t
getDeviceComputeCapability
()
{
int32_t
getDeviceComputeCapability
()
{
...
...
tests/cpp/test_common.h
View file @
bd05b0dc
...
@@ -112,7 +112,9 @@ struct TypeInfo {
...
@@ -112,7 +112,9 @@ struct TypeInfo {
struct
Helper
{
struct
Helper
{
constexpr
static
DType
getType
()
{
constexpr
static
DType
getType
()
{
constexpr
int
i
=
static_cast
<
int
>
(
current
);
constexpr
int
i
=
static_cast
<
int
>
(
current
);
if
(
std
::
is_same
<
U
,
typename
std
::
tuple_element
<
i
,
types
>::
type
>::
value
)
{
if
constexpr
(
i
>=
std
::
tuple_size_v
<
types
>
)
{
return
DType
::
kNumTypes
;
}
else
if
(
std
::
is_same
<
U
,
typename
std
::
tuple_element
<
i
,
types
>::
type
>::
value
)
{
return
current
;
return
current
;
}
else
{
}
else
{
return
Helper
<
U
,
static_cast
<
DType
>
(
i
+
1
)
>::
getType
();
return
Helper
<
U
,
static_cast
<
DType
>
(
i
+
1
)
>::
getType
();
...
...
transformer_engine/common/transpose/quantize_transpose_vector_blockwise.cu
View file @
bd05b0dc
...
@@ -1519,6 +1519,7 @@ void quantize_transpose_vector_blockwise(const SimpleTensor& input, SimpleTensor
...
@@ -1519,6 +1519,7 @@ void quantize_transpose_vector_blockwise(const SimpleTensor& input, SimpleTensor
size_t
smem_bytes
=
kSMemSize_Colwise
*
sizeof
(
InputType
);
size_t
smem_bytes
=
kSMemSize_Colwise
*
sizeof
(
InputType
);
const
size_t
num_blocks_x
=
DIVUP
(
row_length
,
(
size_t
)(
block_len
/
2
));
const
size_t
num_blocks_x
=
DIVUP
(
row_length
,
(
size_t
)(
block_len
/
2
));
const
size_t
num_blocks_y
=
DIVUP
(
num_rows
,
(
size_t
)(
block_len
));
const
size_t
num_blocks_y
=
DIVUP
(
num_rows
,
(
size_t
)(
block_len
));
dim3
grid
(
num_blocks_x
,
num_blocks_y
,
1
);
if
(
smem_bytes
>=
48
*
1024
)
{
if
(
smem_bytes
>=
48
*
1024
)
{
cudaError_t
err
=
cudaFuncSetAttribute
(
cudaError_t
err
=
cudaFuncSetAttribute
(
(
const
void
*
)
&
block_scaled_1d_cast_transpose_kernel_colwise
<
(
const
void
*
)
&
block_scaled_1d_cast_transpose_kernel_colwise
<
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment