Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
7c63aa5f
Commit
7c63aa5f
authored
Jul 31, 2024
by
Harisankar Sadasivan
Browse files
changes for debug
parent
fe15fcc0
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
19 additions
and
19 deletions
+19
-19
example/01_gemm/run_gemm_example_streamk_v2.inc
example/01_gemm/run_gemm_example_streamk_v2.inc
+14
-14
include/ck/host_utility/kernel_launch.hpp
include/ck/host_utility/kernel_launch.hpp
+1
-1
include/ck/stream_config.hpp
include/ck/stream_config.hpp
+2
-2
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
...ration/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
+1
-1
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+1
-1
No files found.
example/01_gemm/run_gemm_example_streamk_v2.inc
View file @
7c63aa5f
...
@@ -267,7 +267,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -267,7 +267,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
auto
ref_argument
=
ref_gemm
.
MakeArgument
(
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
a_m_k
,
b_k_n
,
c_m_n_host_result
,
PassThrough
{},
PassThrough
{},
PassThrough
{});
printf
(
"inside do verification
\n
"
);
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
false
,
1
});
...
@@ -281,19 +281,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -281,19 +281,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
return
ck
::
utils
::
check_err
(
c_m_n_device_result_converted
,
c_m_n_host_result
);
return
ck
::
utils
::
check_err
(
c_m_n_device_result_converted
,
c_m_n_host_result
);
#else
#else
printf
(
"device copy initiated
\n
"
);
// HS
printf
(
"device copy initiated
\n
"
);
// HS
if
((
workspace_size
!=
0
)
&&
(
Streamk_sel
>
0
))
//
if((workspace_size != 0) && (Streamk_sel > 0))
{
//
{
printf
(
"entered if
\n
"
);
//
printf("entered if\n");
workspace
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
//
workspace.FromDevice(c_m_n_device_result.mData.data());
}
//
}
else
//
else
c_m_n_device_buf
.
FromDevice
(
c_m_n_device_result
.
mData
.
data
());
//
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
printf
(
"device copy finished
\n
"
);
// HS
//
printf("device copy finished\n"); // HS
pass
&=
ck
::
utils
::
check_err
(
c_m_n_device_result
,
//
pass &= ck::utils::check_err(c_m_n_device_result,
c_m_n_host_result
,
//
c_m_n_host_result,
"Error: Incorrect results!"
,
//
"Error: Incorrect results!",
get_rtol
<
CDataType
>
(),
//
get_rtol<CDataType>(),
get_atol
<
CDataType
>
());
//
get_atol<CDataType>());
#endif
#endif
}
}
...
...
include/ck/host_utility/kernel_launch.hpp
View file @
7c63aa5f
...
@@ -109,7 +109,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
...
@@ -109,7 +109,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
printf
(
"Warm up %d times
\n
"
,
stream_config
.
cold_niters_
);
printf
(
"Warm up %d times
\n
"
,
stream_config
.
cold_niters_
);
}
}
// warm up
// warm up
preprocess
();
//HS
preprocess();
for
(
int
i
=
0
;
i
<
stream_config
.
cold_niters_
;
++
i
)
for
(
int
i
=
0
;
i
<
stream_config
.
cold_niters_
;
++
i
)
{
{
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
kernel
<<<
grid_dim
,
block_dim
,
lds_byte
,
stream_config
.
stream_id_
>>>
(
args
...);
...
...
include/ck/stream_config.hpp
View file @
7c63aa5f
...
@@ -11,8 +11,8 @@ struct StreamConfig
...
@@ -11,8 +11,8 @@ struct StreamConfig
hipStream_t
stream_id_
=
nullptr
;
hipStream_t
stream_id_
=
nullptr
;
bool
time_kernel_
=
false
;
bool
time_kernel_
=
false
;
int
log_level_
=
0
;
int
log_level_
=
0
;
int
cold_niters_
=
5
;
int
cold_niters_
=
0
;
//HS
int
nrepeat_
=
50
;
int
nrepeat_
=
1
;
//HS
bool
flush_cache
=
false
;
bool
flush_cache
=
false
;
int
rotating_count
=
1
;
int
rotating_count
=
1
;
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
View file @
7c63aa5f
...
@@ -1186,7 +1186,6 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
...
@@ -1186,7 +1186,6 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
void
*
p_workspace
,
void
*
p_workspace
,
Block2CTileMap_streamk
block_2_ctile_map_streamk
)
Block2CTileMap_streamk
block_2_ctile_map_streamk
)
{
{
const
AElementwiseOperation
a_element_op
{};
const
AElementwiseOperation
a_element_op
{};
const
BElementwiseOperation
b_element_op
{};
const
BElementwiseOperation
b_element_op
{};
const
CElementwiseOperation
c_element_op
{};
const
CElementwiseOperation
c_element_op
{};
...
@@ -1899,6 +1898,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
...
@@ -1899,6 +1898,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
}
}
}
}
}
// for loop
}
// for loop
if
(
threadIdx
.
x
==
0
)
printf
(
"kernel ends"
);
}
}
template
<
bool
HasMainKBlockLoop
,
template
<
bool
HasMainKBlockLoop
,
...
...
script/cmake-ck-dev.sh
View file @
7c63aa5f
...
@@ -8,7 +8,7 @@ MY_PROJECT_SOURCE=$1
...
@@ -8,7 +8,7 @@ MY_PROJECT_SOURCE=$1
if
[
$#
-ge
2
]
;
then
if
[
$#
-ge
2
]
;
then
GPU_TARGETS
=
$2
GPU_TARGETS
=
$2
else
else
GPU_TARGETS
=
"gfx90
8;gfx90a;gfx940
"
GPU_TARGETS
=
"gfx90
a
"
fi
fi
cmake
\
cmake
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment