Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
f1fe1ce6
Commit
f1fe1ce6
authored
Oct 16, 2024
by
Andriy Roshchenko
Browse files
Merge branch 'andriy/lwpck-2243' into andriy/lwpck-2388
parents
f7e4a330
4a50b93a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
6 deletions
+47
-6
CMakePresets.json
CMakePresets.json
+43
-2
include/ck/utility/amd_ck_fp8.hpp
include/ck/utility/amd_ck_fp8.hpp
+4
-4
No files found.
CMakePresets.json
View file @
f1fe1ce6
...
@@ -11,7 +11,6 @@
...
@@ -11,7 +11,6 @@
"cacheVariables"
:
{
"cacheVariables"
:
{
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_EXPORT_COMPILE_COMMANDS"
:
"ON"
,
"CMAKE_EXPORT_COMPILE_COMMANDS"
:
"ON"
,
"GPU_TARGETS"
:
"gfx950"
,
"BUILD_DEV"
:
"ON"
,
"BUILD_DEV"
:
"ON"
,
"CMAKE_CXX_COMPILER"
:
"/opt/rocm/llvm/bin/clang++"
,
"CMAKE_CXX_COMPILER"
:
"/opt/rocm/llvm/bin/clang++"
,
"CMAKE_PREFIX_PATH"
:
"/opt/rocm"
"CMAKE_PREFIX_PATH"
:
"/opt/rocm"
...
@@ -32,6 +31,7 @@
...
@@ -32,6 +31,7 @@
"NONE"
:
""
"NONE"
:
""
},
},
"cacheVariables"
:
{
"cacheVariables"
:
{
"GPU_TARGETS"
:
"gfx950"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_CXX_FLAGS"
:
"-O0 -ggdb"
"CMAKE_CXX_FLAGS"
:
"-O0 -ggdb"
}
}
...
@@ -39,8 +39,29 @@
...
@@ -39,8 +39,29 @@
{
{
"name"
:
"MI355-release"
,
"name"
:
"MI355-release"
,
"displayName"
:
"MI355 Release"
,
"displayName"
:
"MI355 Release"
,
"inherits"
:
"MI355-debug"
,
"inherits"
:
"linux-debug"
,
"cacheVariables"
:
{
"GPU_TARGETS"
:
"gfx950"
,
"CMAKE_BUILD_TYPE"
:
"Release"
,
"CMAKE_CXX_FLAGS"
:
"-O3"
}
},
{
"name"
:
"MI300X-release"
,
"displayName"
:
"MI300X Release"
,
"inherits"
:
"linux-debug"
,
"cacheVariables"
:
{
"GPU_TARGETS"
:
"gfx942"
,
"CMAKE_BUILD_TYPE"
:
"Release"
,
"CMAKE_CXX_FLAGS"
:
"-O3"
}
},
{
"name"
:
"MI250-release"
,
"displayName"
:
"MI250 Release"
,
"inherits"
:
"linux-debug"
,
"cacheVariables"
:
{
"cacheVariables"
:
{
"GPU_TARGETS"
:
"gfx90a"
,
"CMAKE_BUILD_TYPE"
:
"Release"
,
"CMAKE_BUILD_TYPE"
:
"Release"
,
"CMAKE_CXX_FLAGS"
:
"-O3"
"CMAKE_CXX_FLAGS"
:
"-O3"
}
}
...
@@ -76,6 +97,26 @@
...
@@ -76,6 +97,26 @@
"Release"
"Release"
],
],
"jobs"
:
128
"jobs"
:
128
},
{
"name"
:
"MI300X-release"
,
"displayName"
:
"MI300X"
,
"configurePreset"
:
"MI300X-release"
,
"description"
:
"Build Environment for MI300X Release."
,
"inherits"
:
[
"Release"
],
"jobs"
:
128
},
{
"name"
:
"MI250-release"
,
"displayName"
:
"MI250"
,
"configurePreset"
:
"MI250-release"
,
"description"
:
"Build Environment for MI250 Release."
,
"inherits"
:
[
"Release"
],
"jobs"
:
128
}
}
]
]
}
}
include/ck/utility/amd_ck_fp8.hpp
View file @
f1fe1ce6
...
@@ -276,7 +276,7 @@ struct f8_ocp_t
...
@@ -276,7 +276,7 @@ struct f8_ocp_t
__host__
explicit
operator
float
()
const
__host__
explicit
operator
float
()
const
#endif
#endif
{
{
#if
CK_FP8_CVT_FAST_PATH
#if
defined(__gfx950__) || defined(__gfx1200__) || defined(__gfx1201__)
return
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
);
return
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
);
#else
#else
return
fp8_impl
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
return
fp8_impl
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
...
@@ -290,7 +290,7 @@ struct f8_ocp_t
...
@@ -290,7 +290,7 @@ struct f8_ocp_t
__host__
explicit
operator
_Float16
()
const
__host__
explicit
operator
_Float16
()
const
#endif
#endif
{
{
#if
CK_FP8_CVT_FAST_PATH
#if
defined(__gfx950__) || defined(__gfx1200__) || defined(__gfx1201__)
return
static_cast
<
_Float16
>
(
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
));
return
static_cast
<
_Float16
>
(
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
));
#else
#else
return
fp8_impl
::
cast_from_f8
<
_Float16
,
wm
,
we
,
false
>
(
return
fp8_impl
::
cast_from_f8
<
_Float16
,
wm
,
we
,
false
>
(
...
@@ -322,7 +322,7 @@ struct bf8_ocp_t
...
@@ -322,7 +322,7 @@ struct bf8_ocp_t
__host__
explicit
operator
float
()
const
__host__
explicit
operator
float
()
const
#endif
#endif
{
{
#if
CK_FP8_CVT_FAST_PATH
#if
defined(__gfx950__) || defined(__gfx1200__) || defined(__gfx1201__)
return
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
);
return
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
);
#else
#else
return
fp8_impl
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
return
fp8_impl
::
cast_from_f8
<
float
,
wm
,
we
,
false
>
(
...
@@ -336,7 +336,7 @@ struct bf8_ocp_t
...
@@ -336,7 +336,7 @@ struct bf8_ocp_t
__host__
explicit
operator
_Float16
()
const
__host__
explicit
operator
_Float16
()
const
#endif
#endif
{
{
#if
CK_FP8_CVT_FAST_PATH
#if
defined(__gfx950__) || defined(__gfx1200__) || defined(__gfx1201__)
return
static_cast
<
_Float16
>
(
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
));
return
static_cast
<
_Float16
>
(
fp8_impl
::
cast_to_f32_from_f8
<
default_interpret
>
(
this
->
data
));
#else
#else
return
fp8_impl
::
cast_from_f8
<
_Float16
,
wm
,
we
,
false
>
(
return
fp8_impl
::
cast_from_f8
<
_Float16
,
wm
,
we
,
false
>
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment