Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
6c2d60d3
"conda/vscode:/vscode.git/clone" did not exist on "908779b26d3daa03fb258443e89e96f4b81f90d7"
Commit
6c2d60d3
authored
Sep 25, 2023
by
Xinya Zhang
Browse files
hipMemcpy -> hipMemcpyWithStream
parent
7e71583f
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
47 additions
and
37 deletions
+47
-37
include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp
...pl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp
+6
-5
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
...device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
...sor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v1.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v1.hpp
+6
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v2.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v2.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
...ice/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v1.hpp
...pu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v1.hpp
+5
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp
...pu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp
+5
-4
No files found.
include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp
View file @
6c2d60d3
...
...
@@ -651,11 +651,12 @@ struct DeviceGroupedContractionMultipleD_Xdl_CShuffle
}
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
contraction_multi_d_kernel_args_
.
data
(),
arg
.
contraction_multi_d_kernel_args_
.
size
()
*
sizeof
(
ContractionMultiDKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp
View file @
6c2d60d3
...
...
@@ -610,10 +610,11 @@ struct DeviceGroupedGemmSoftmaxGemmPermute_Xdl_CShuffle
some_has_main_k_block_loop
|=
y
;
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
View file @
6c2d60d3
...
...
@@ -543,10 +543,11 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
gemm_desc_kernel_arg_
.
data
(),
arg
.
gemm_desc_kernel_arg_
.
size
()
*
sizeof
(
GemmBiasTransKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v1.hpp
View file @
6c2d60d3
...
...
@@ -934,10 +934,11 @@ struct DeviceGroupedMultiheadAttentionBackward_Kloop_Xdl_CShuffle_V1
// some_has_main_k_block_loop |= y;
// }
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
Async
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
@@ -954,6 +955,7 @@ struct DeviceGroupedMultiheadAttentionBackward_Kloop_Xdl_CShuffle_V1
has_main_k_block_loop_
,
Deterministic
>
;
std
::
cerr
<<
"Calling kernel kernel_grouped_multihead_attention_backward_kloop_xdl_cshuffle_v1 LINE: "
<<
__LINE__
<<
" arg.p_workspace_ = "
<<
arg
.
p_workspace_
<<
std
::
endl
;
return
launch_and_time_kernel
(
stream_config
,
kernel
,
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_kloop_v2.hpp
View file @
6c2d60d3
...
...
@@ -941,10 +941,11 @@ struct DeviceGroupedMultiheadAttentionBackward_Kloop_Xdl_CShuffle_V2
some_has_main_k_block_loop
|=
y
;
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v1.hpp
View file @
6c2d60d3
...
...
@@ -955,10 +955,11 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V1
// some_has_main_k_block_loop |= y;
// }
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_bwd_xdl_cshuffle_qloop_v2.hpp
View file @
6c2d60d3
...
...
@@ -962,10 +962,11 @@ struct DeviceGroupedMultiheadAttentionBackward_Qloop_Xdl_CShuffle_V2
some_has_main_k_block_loop
|=
y
;
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v1.hpp
View file @
6c2d60d3
...
...
@@ -804,10 +804,11 @@ struct DeviceGroupedMultiheadAttentionForward_Xdl_CShuffle_V1
some_has_main_k_block_loop
|=
y
;
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_mha_fwd_xdl_cshuffle_v2.hpp
View file @
6c2d60d3
...
...
@@ -826,10 +826,11 @@ struct DeviceGroupedMultiheadAttentionForward_Xdl_CShuffle_V2
some_has_main_k_block_loop
|=
y
;
}
hipGetErrorString
(
hipMemcpy
(
arg
.
p_workspace_
,
hipGetErrorString
(
hipMemcpy
WithStream
(
arg
.
p_workspace_
,
arg
.
group_kernel_args_
.
data
(),
arg
.
group_kernel_args_
.
size
()
*
sizeof
(
GroupKernelArg
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
,
stream_config
.
stream_id_
));
float
ave_time
=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment