Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7fd0e649
"vscode:/vscode.git/clone" did not exist on "22827c8e82ca589b96e0144f5c74572877ae0e50"
Commit
7fd0e649
authored
Jun 25, 2022
by
Chaitanya Inumella
Browse files
Added the changes for the contraction adapted to the cuTENSOR library style
parent
667e52cc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
127 additions
and
15 deletions
+127
-15
example/23_contraction/contraction_xdl_fp32.cpp
example/23_contraction/contraction_xdl_fp32.cpp
+95
-13
library/include/ck/library/host_tensor/device.hpp
library/include/ck/library/host_tensor/device.hpp
+1
-1
library/include/ck/library/host_tensor/host_tensor.hpp
library/include/ck/library/host_tensor/host_tensor.hpp
+1
-1
library/include/ck/library/host_tensor/host_tensor_generator.hpp
.../include/ck/library/host_tensor/host_tensor_generator.hpp
+30
-0
No files found.
example/23_contraction/contraction_xdl_fp32.cpp
View file @
7fd0e649
#include <iostream>
#include <iostream>
#include <fstream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
...
@@ -37,7 +38,8 @@ using AElementOp = ck::tensor_operation::element_wise::PassThrough;
...
@@ -37,7 +38,8 @@ using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
//static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
// clang-format off
// clang-format off
using
DeviceOpInstance
=
ck
::
tensor_operation
::
device
::
using
DeviceOpInstance
=
ck
::
tensor_operation
::
device
::
...
@@ -48,6 +50,23 @@ using DeviceOpInstance = ck::tensor_operation::device::
...
@@ -48,6 +50,23 @@ using DeviceOpInstance = ck::tensor_operation::device::
DeviceContraction_Xdl_CShuffle
<
NumDimM
,
NumDimN
,
NumDimK
,
F32
,
F32
,
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
4
>
;
DeviceContraction_Xdl_CShuffle
<
NumDimM
,
NumDimN
,
NumDimK
,
F32
,
F32
,
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
4
>
;
// clang-format on
// clang-format on
template
<
typename
T
,
typename
Range
>
void
LogRangeToFile
(
std
::
ofstream
&
fs
,
Range
&&
range
,
std
::
string
delim
)
{
bool
first
=
true
;
for
(
auto
&&
v
:
range
)
{
if
(
first
)
first
=
false
;
else
fs
<<
delim
;
fs
<<
static_cast
<
T
>
(
v
);
}
return
;
}
// hardcoded for NumDimM == NumDimN == NumDimK == 2
// hardcoded for NumDimM == NumDimN == NumDimK == 2
template
<
ck
::
index_t
NumDimM
,
template
<
ck
::
index_t
NumDimM
,
ck
::
index_t
NumDimN
,
ck
::
index_t
NumDimN
,
...
@@ -197,7 +216,7 @@ using ReferenceOpInstance = ReferenceContraction_M2_N2_K2<NumDimM,
...
@@ -197,7 +216,7 @@ using ReferenceOpInstance = ReferenceContraction_M2_N2_K2<NumDimM,
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
1
;
int
init_method
=
3
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
if
(
argc
==
4
)
if
(
argc
==
4
)
...
@@ -209,21 +228,42 @@ int main(int argc, char* argv[])
...
@@ -209,21 +228,42 @@ int main(int argc, char* argv[])
else
else
{
{
printf
(
"arg1: verification (0=no, 1=yes)
\n
"
);
printf
(
"arg1: verification (0=no, 1=yes)
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value
, 3=cutensor_style_init
)
\n
"
);
printf
(
"arg3: time kernel (0=no, 1=yes)
\n
"
);
printf
(
"arg3: time kernel (0=no, 1=yes)
\n
"
);
exit
(
0
);
exit
(
0
);
}
}
// A[M0, M1, K0, K1]
std
::
ofstream
tensorA
;
std
::
ofstream
tensorB
;
std
::
ofstream
tensorC
;
std
::
ofstream
tensorC_d
;
std
::
cout
<<
"RAND_MAX value is "
<<
RAND_MAX
<<
std
::
endl
;
#if 0
// a[m0, m1, k0, k1]
std::vector<ck::index_t> a_ms_ks_lengths{30, 128, 32, 64};
std::vector<ck::index_t> a_ms_ks_lengths{30, 128, 32, 64};
std
::
vector
<
ck
::
index_t
>
a_ms_ks_strides
{
524288
,
4096
,
128
,
1
};
//
std::vector<ck::index_t> a_ms_ks_strides{524288, 4096, 128, 1};
//
B[K
0,
K
1,
N
0,
N
1]
//
b[k
0,
k
1,
n
0,
n
1]
std::vector<ck::index_t> b_ks_ns_lengths{32, 64, 32, 64};
std::vector<ck::index_t> b_ks_ns_lengths{32, 64, 32, 64};
std
::
vector
<
ck
::
index_t
>
b_ks_ns_strides
{
128
,
1
,
524288
,
4096
};
//
std::vector<ck::index_t> b_ks_ns_strides{128, 1, 524288, 4096};
//
C[M
0,
M
1,
N
0,
N
1]
//
c[m
0,
m
1,
n
0,
n
1]
std::vector<ck::index_t> c_ms_ns_lengths{30, 128, 32, 64};
std::vector<ck::index_t> c_ms_ns_lengths{30, 128, 32, 64};
std
::
vector
<
ck
::
index_t
>
c_ms_ns_strides
{
524288
,
4096
,
128
,
1
};
//std::vector<ck::index_t> c_ms_ns_strides{524288, 4096, 128, 1};
#else
// a[m0, m1, k0, k1]
std
::
vector
<
ck
::
index_t
>
a_ms_ks_lengths
{
5
,
6
,
3
,
4
};
//std::vector<ck::index_t> a_ms_ks_strides{108,20,16,1};
// b[k0, k1, n0, n1]
std
::
vector
<
ck
::
index_t
>
b_ks_ns_lengths
{
3
,
4
,
3
,
4
};
//std::vector<ck::index_t> b_ks_ns_strides{16,1,108,20};
// c[m0, m1, n0, n1]
std
::
vector
<
ck
::
index_t
>
c_ms_ns_lengths
{
5
,
6
,
3
,
4
};
//std::vector<ck::index_t> c_ms_ns_strides{108,20,16,1};
#endif
#if 0
Tensor<ADataType> a_ms_ks(
Tensor<ADataType> a_ms_ks(
std::vector<std::size_t>(a_ms_ks_lengths.begin(), a_ms_ks_lengths.end()),
std::vector<std::size_t>(a_ms_ks_lengths.begin(), a_ms_ks_lengths.end()),
std::vector<std::size_t>(a_ms_ks_strides.begin(), a_ms_ks_strides.end()));
std::vector<std::size_t>(a_ms_ks_strides.begin(), a_ms_ks_strides.end()));
...
@@ -236,6 +276,16 @@ int main(int argc, char* argv[])
...
@@ -236,6 +276,16 @@ int main(int argc, char* argv[])
Tensor<CDataType> c_ms_ns_device_result(
Tensor<CDataType> c_ms_ns_device_result(
std::vector<std::size_t>(c_ms_ns_lengths.begin(), c_ms_ns_lengths.end()),
std::vector<std::size_t>(c_ms_ns_lengths.begin(), c_ms_ns_lengths.end()),
std::vector<std::size_t>(c_ms_ns_strides.begin(), c_ms_ns_strides.end()));
std::vector<std::size_t>(c_ms_ns_strides.begin(), c_ms_ns_strides.end()));
#else
Tensor
<
ADataType
>
a_ms_ks
(
std
::
vector
<
std
::
size_t
>
(
a_ms_ks_lengths
.
begin
(),
a_ms_ks_lengths
.
end
()));
Tensor
<
BDataType
>
b_ks_ns
(
std
::
vector
<
std
::
size_t
>
(
b_ks_ns_lengths
.
begin
(),
b_ks_ns_lengths
.
end
()));
Tensor
<
CDataType
>
c_ms_ns_host_result
(
std
::
vector
<
std
::
size_t
>
(
c_ms_ns_lengths
.
begin
(),
c_ms_ns_lengths
.
end
()));
Tensor
<
CDataType
>
c_ms_ns_device_result
(
std
::
vector
<
std
::
size_t
>
(
c_ms_ns_lengths
.
begin
(),
c_ms_ns_lengths
.
end
()));
#endif
std
::
cout
<<
"a_ms_ks: "
<<
a_ms_ks
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"a_ms_ks: "
<<
a_ms_ks
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"b_ks_ns: "
<<
b_ks_ns
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"b_ks_ns: "
<<
b_ks_ns
.
mDesc
<<
std
::
endl
;
...
@@ -252,6 +302,10 @@ int main(int argc, char* argv[])
...
@@ -252,6 +302,10 @@ int main(int argc, char* argv[])
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
1.0
});
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
1.0
});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
case
3
:
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_cuTensor
<
ADataType
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_cuTensor
<
BDataType
>
{});
break
;
default:
default:
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
...
@@ -261,6 +315,10 @@ int main(int argc, char* argv[])
...
@@ -261,6 +315,10 @@ int main(int argc, char* argv[])
DeviceMem
b_ks_ns_device_buf
(
sizeof
(
BDataType
)
*
b_ks_ns
.
mDesc
.
GetElementSpace
());
DeviceMem
b_ks_ns_device_buf
(
sizeof
(
BDataType
)
*
b_ks_ns
.
mDesc
.
GetElementSpace
());
DeviceMem
c_ms_ns_device_buf
(
sizeof
(
CDataType
)
*
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
c_ms_ns_device_buf
(
sizeof
(
CDataType
)
*
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
std
::
cout
<<
"Tensor A element space: "
<<
a_ms_ks
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
std
::
cout
<<
"Tensor B element space: "
<<
b_ks_ns
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
std
::
cout
<<
"Tensor C element space: "
<<
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
a_ms_ks_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
a_ms_ks_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
b_ks_ns_device_buf
.
ToDevice
(
b_ks_ns
.
mData
.
data
());
b_ks_ns_device_buf
.
ToDevice
(
b_ks_ns
.
mData
.
data
());
...
@@ -278,11 +336,11 @@ int main(int argc, char* argv[])
...
@@ -278,11 +336,11 @@ int main(int argc, char* argv[])
static_cast
<
BDataType
*>
(
b_ks_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
BDataType
*>
(
b_ks_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_ms_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_ms_ns_device_buf
.
GetDeviceBuffer
()),
a_ms_ks_lengths
,
a_ms_ks_lengths
,
a_ms_ks_strides
,
std
::
vector
<
ck
::
index_t
>
(
a_ms_ks
.
mDesc
.
mStrides
.
begin
(),
a_ms_ks
.
mDesc
.
mStrides
.
end
())
,
b_ks_ns_lengths
,
b_ks_ns_lengths
,
b_ks_ns_strides
,
std
::
vector
<
ck
::
index_t
>
(
b_ks_ns
.
mDesc
.
mStrides
.
begin
(),
b_ks_ns
.
mDesc
.
mStrides
.
end
())
,
c_ms_ns_lengths
,
c_ms_ns_lengths
,
c_ms_ns_strides
,
std
::
vector
<
ck
::
index_t
>
(
c_ms_ns_host_result
.
mDesc
.
mStrides
.
begin
(),
c_ms_ns_host_result
.
mDesc
.
mStrides
.
end
())
,
a_element_op
,
a_element_op
,
b_element_op
,
b_element_op
,
c_element_op
);
c_element_op
);
...
@@ -324,6 +382,17 @@ int main(int argc, char* argv[])
...
@@ -324,6 +382,17 @@ int main(int argc, char* argv[])
c_ms_ns_device_buf
.
FromDevice
(
c_ms_ns_device_result
.
mData
.
data
());
c_ms_ns_device_buf
.
FromDevice
(
c_ms_ns_device_result
.
mData
.
data
());
tensorA
.
open
(
"tensor_A.txt"
);
LogRangeToFile
<
ADataType
>
(
tensorA
,
a_ms_ks
.
mData
,
","
);
LogRangeAsType
<
ADataType
>
(
std
::
cout
<<
"Tensor A elements:
\n
"
,
a_ms_ks
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorA
.
close
();
tensorB
.
open
(
"tensor_B.txt"
);
LogRangeToFile
<
BDataType
>
(
tensorB
,
b_ks_ns
.
mData
,
","
);
LogRangeAsType
<
BDataType
>
(
std
::
cout
<<
"Tensor B elements:
\n
"
,
b_ks_ns
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorB
.
close
();
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_gemm
=
ReferenceOpInstance
{};
auto
ref_gemm
=
ReferenceOpInstance
{};
...
@@ -334,6 +403,19 @@ int main(int argc, char* argv[])
...
@@ -334,6 +403,19 @@ int main(int argc, char* argv[])
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
tensorC
.
open
(
"tensor_C_contraction_host_results.txt"
);
LogRangeToFile
<
CDataType
>
(
tensorC
,
c_ms_ns_host_result
.
mData
,
","
);
LogRangeAsType
<
CDataType
>
(
std
::
cout
<<
"Tensor C_host elements:
\n
"
,
c_ms_ns_host_result
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorC
.
close
();
tensorC
.
open
(
"tensor_C_contraction_device_results.txt"
);
LogRangeToFile
<
CDataType
>
(
tensorC_d
,
c_ms_ns_device_result
.
mData
,
","
);
LogRangeAsType
<
CDataType
>
(
std
::
cout
<<
"Tensor C_device elements:
\n
"
,
c_ms_ns_device_result
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorC
.
close
();
return
ck
::
utils
::
check_err
(
c_ms_ns_device_result
.
mData
,
c_ms_ns_host_result
.
mData
)
?
0
:
1
;
return
ck
::
utils
::
check_err
(
c_ms_ns_device_result
.
mData
,
c_ms_ns_host_result
.
mData
)
?
0
:
1
;
}
}
...
...
library/include/ck/library/host_tensor/device.hpp
View file @
7fd0e649
...
@@ -32,7 +32,7 @@ inline void hip_check_error(hipError_t x)
...
@@ -32,7 +32,7 @@ inline void hip_check_error(hipError_t x)
struct
DeviceMem
struct
DeviceMem
{
{
DeviceMem
()
=
de
fault
;
DeviceMem
()
=
de
lete
;
DeviceMem
(
std
::
size_t
mem_size
);
DeviceMem
(
std
::
size_t
mem_size
);
void
*
GetDeviceBuffer
();
void
*
GetDeviceBuffer
();
std
::
size_t
GetBufferSize
();
std
::
size_t
GetBufferSize
();
...
...
library/include/ck/library/host_tensor/host_tensor.hpp
View file @
7fd0e649
...
@@ -109,7 +109,7 @@ struct HostTensorDescriptor
...
@@ -109,7 +109,7 @@ struct HostTensorDescriptor
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
HostTensorDescriptor
&
desc
);
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
HostTensorDescriptor
&
desc
);
private:
//
private:
std
::
vector
<
std
::
size_t
>
mLens
;
std
::
vector
<
std
::
size_t
>
mLens
;
std
::
vector
<
std
::
size_t
>
mStrides
;
std
::
vector
<
std
::
size_t
>
mStrides
;
};
};
...
...
library/include/ck/library/host_tensor/host_tensor_generator.hpp
View file @
7fd0e649
...
@@ -74,6 +74,7 @@ struct GeneratorTensor_2<ck::bhalf_t>
...
@@ -74,6 +74,7 @@ struct GeneratorTensor_2<ck::bhalf_t>
ck
::
bhalf_t
operator
()(
Is
...)
ck
::
bhalf_t
operator
()(
Is
...)
{
{
float
tmp
=
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
float
tmp
=
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
std
::
cout
<<
tmp
<<
","
;
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
}
}
};
};
...
@@ -123,6 +124,35 @@ struct GeneratorTensor_3<ck::bhalf_t>
...
@@ -123,6 +124,35 @@ struct GeneratorTensor_3<ck::bhalf_t>
}
}
};
};
template
<
typename
T
>
struct
GeneratorTensor_cuTensor
{
//int min_value = 0;
//int max_value = 1;
template
<
typename
...
Is
>
T
operator
()(
Is
...)
{
float
tmp
=
((
float
(
std
::
rand
()))
/
RAND_MAX
-
0.5
)
*
100
;
return
static_cast
<
T
>
(
tmp
);
}
};
template
<
>
struct
GeneratorTensor_cuTensor
<
ck
::
bhalf_t
>
{
//int min_value = 0;
//int max_value = 1;
template
<
typename
...
Is
>
ck
::
bhalf_t
operator
()(
Is
...)
{
float
tmp
=
((
float
(
std
::
rand
()))
/
RAND_MAX
-
0.5
)
*
100
;
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
}
};
struct
GeneratorTensor_Checkboard
struct
GeneratorTensor_Checkboard
{
{
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment