Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7fd0e649
Commit
7fd0e649
authored
Jun 25, 2022
by
Chaitanya Inumella
Browse files
Added the changes for the contraction adapted to the cuTENSOR library style
parent
667e52cc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
127 additions
and
15 deletions
+127
-15
example/23_contraction/contraction_xdl_fp32.cpp
example/23_contraction/contraction_xdl_fp32.cpp
+95
-13
library/include/ck/library/host_tensor/device.hpp
library/include/ck/library/host_tensor/device.hpp
+1
-1
library/include/ck/library/host_tensor/host_tensor.hpp
library/include/ck/library/host_tensor/host_tensor.hpp
+1
-1
library/include/ck/library/host_tensor/host_tensor_generator.hpp
.../include/ck/library/host_tensor/host_tensor_generator.hpp
+30
-0
No files found.
example/23_contraction/contraction_xdl_fp32.cpp
View file @
7fd0e649
#include <iostream>
#include <iostream>
#include <fstream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <cstdlib>
...
@@ -37,7 +38,8 @@ using AElementOp = ck::tensor_operation::element_wise::PassThrough;
...
@@ -37,7 +38,8 @@ using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
BElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
using
CElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
//static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
MNKPadding
;
// clang-format off
// clang-format off
using
DeviceOpInstance
=
ck
::
tensor_operation
::
device
::
using
DeviceOpInstance
=
ck
::
tensor_operation
::
device
::
...
@@ -48,6 +50,23 @@ using DeviceOpInstance = ck::tensor_operation::device::
...
@@ -48,6 +50,23 @@ using DeviceOpInstance = ck::tensor_operation::device::
DeviceContraction_Xdl_CShuffle
<
NumDimM
,
NumDimN
,
NumDimK
,
F32
,
F32
,
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
4
>
;
DeviceContraction_Xdl_CShuffle
<
NumDimM
,
NumDimN
,
NumDimK
,
F32
,
F32
,
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
1
,
256
,
256
,
128
,
16
,
4
,
4
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
4
,
4
,
1
,
1
,
1
,
S
<
1
,
16
,
1
,
16
>
,
4
>
;
// clang-format on
// clang-format on
template
<
typename
T
,
typename
Range
>
void
LogRangeToFile
(
std
::
ofstream
&
fs
,
Range
&&
range
,
std
::
string
delim
)
{
bool
first
=
true
;
for
(
auto
&&
v
:
range
)
{
if
(
first
)
first
=
false
;
else
fs
<<
delim
;
fs
<<
static_cast
<
T
>
(
v
);
}
return
;
}
// hardcoded for NumDimM == NumDimN == NumDimK == 2
// hardcoded for NumDimM == NumDimN == NumDimK == 2
template
<
ck
::
index_t
NumDimM
,
template
<
ck
::
index_t
NumDimM
,
ck
::
index_t
NumDimN
,
ck
::
index_t
NumDimN
,
...
@@ -197,7 +216,7 @@ using ReferenceOpInstance = ReferenceContraction_M2_N2_K2<NumDimM,
...
@@ -197,7 +216,7 @@ using ReferenceOpInstance = ReferenceContraction_M2_N2_K2<NumDimM,
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
1
;
int
init_method
=
3
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
if
(
argc
==
4
)
if
(
argc
==
4
)
...
@@ -209,21 +228,42 @@ int main(int argc, char* argv[])
...
@@ -209,21 +228,42 @@ int main(int argc, char* argv[])
else
else
{
{
printf
(
"arg1: verification (0=no, 1=yes)
\n
"
);
printf
(
"arg1: verification (0=no, 1=yes)
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value)
\n
"
);
printf
(
"arg2: initialization (0=no init, 1=integer value, 2=decimal value
, 3=cutensor_style_init
)
\n
"
);
printf
(
"arg3: time kernel (0=no, 1=yes)
\n
"
);
printf
(
"arg3: time kernel (0=no, 1=yes)
\n
"
);
exit
(
0
);
exit
(
0
);
}
}
// A[M0, M1, K0, K1]
std
::
ofstream
tensorA
;
std
::
ofstream
tensorB
;
std
::
ofstream
tensorC
;
std
::
ofstream
tensorC_d
;
std
::
cout
<<
"RAND_MAX value is "
<<
RAND_MAX
<<
std
::
endl
;
#if 0
// a[m0, m1, k0, k1]
std::vector<ck::index_t> a_ms_ks_lengths{30, 128, 32, 64};
std::vector<ck::index_t> a_ms_ks_lengths{30, 128, 32, 64};
std
::
vector
<
ck
::
index_t
>
a_ms_ks_strides
{
524288
,
4096
,
128
,
1
};
//
std::vector<ck::index_t> a_ms_ks_strides{524288, 4096, 128, 1};
//
B[K
0,
K
1,
N
0,
N
1]
//
b[k
0,
k
1,
n
0,
n
1]
std::vector<ck::index_t> b_ks_ns_lengths{32, 64, 32, 64};
std::vector<ck::index_t> b_ks_ns_lengths{32, 64, 32, 64};
std
::
vector
<
ck
::
index_t
>
b_ks_ns_strides
{
128
,
1
,
524288
,
4096
};
//
std::vector<ck::index_t> b_ks_ns_strides{128, 1, 524288, 4096};
//
C[M
0,
M
1,
N
0,
N
1]
//
c[m
0,
m
1,
n
0,
n
1]
std::vector<ck::index_t> c_ms_ns_lengths{30, 128, 32, 64};
std::vector<ck::index_t> c_ms_ns_lengths{30, 128, 32, 64};
std
::
vector
<
ck
::
index_t
>
c_ms_ns_strides
{
524288
,
4096
,
128
,
1
};
//std::vector<ck::index_t> c_ms_ns_strides{524288, 4096, 128, 1};
#else
// a[m0, m1, k0, k1]
std
::
vector
<
ck
::
index_t
>
a_ms_ks_lengths
{
5
,
6
,
3
,
4
};
//std::vector<ck::index_t> a_ms_ks_strides{108,20,16,1};
// b[k0, k1, n0, n1]
std
::
vector
<
ck
::
index_t
>
b_ks_ns_lengths
{
3
,
4
,
3
,
4
};
//std::vector<ck::index_t> b_ks_ns_strides{16,1,108,20};
// c[m0, m1, n0, n1]
std
::
vector
<
ck
::
index_t
>
c_ms_ns_lengths
{
5
,
6
,
3
,
4
};
//std::vector<ck::index_t> c_ms_ns_strides{108,20,16,1};
#endif
#if 0
Tensor<ADataType> a_ms_ks(
Tensor<ADataType> a_ms_ks(
std::vector<std::size_t>(a_ms_ks_lengths.begin(), a_ms_ks_lengths.end()),
std::vector<std::size_t>(a_ms_ks_lengths.begin(), a_ms_ks_lengths.end()),
std::vector<std::size_t>(a_ms_ks_strides.begin(), a_ms_ks_strides.end()));
std::vector<std::size_t>(a_ms_ks_strides.begin(), a_ms_ks_strides.end()));
...
@@ -236,6 +276,16 @@ int main(int argc, char* argv[])
...
@@ -236,6 +276,16 @@ int main(int argc, char* argv[])
Tensor<CDataType> c_ms_ns_device_result(
Tensor<CDataType> c_ms_ns_device_result(
std::vector<std::size_t>(c_ms_ns_lengths.begin(), c_ms_ns_lengths.end()),
std::vector<std::size_t>(c_ms_ns_lengths.begin(), c_ms_ns_lengths.end()),
std::vector<std::size_t>(c_ms_ns_strides.begin(), c_ms_ns_strides.end()));
std::vector<std::size_t>(c_ms_ns_strides.begin(), c_ms_ns_strides.end()));
#else
Tensor
<
ADataType
>
a_ms_ks
(
std
::
vector
<
std
::
size_t
>
(
a_ms_ks_lengths
.
begin
(),
a_ms_ks_lengths
.
end
()));
Tensor
<
BDataType
>
b_ks_ns
(
std
::
vector
<
std
::
size_t
>
(
b_ks_ns_lengths
.
begin
(),
b_ks_ns_lengths
.
end
()));
Tensor
<
CDataType
>
c_ms_ns_host_result
(
std
::
vector
<
std
::
size_t
>
(
c_ms_ns_lengths
.
begin
(),
c_ms_ns_lengths
.
end
()));
Tensor
<
CDataType
>
c_ms_ns_device_result
(
std
::
vector
<
std
::
size_t
>
(
c_ms_ns_lengths
.
begin
(),
c_ms_ns_lengths
.
end
()));
#endif
std
::
cout
<<
"a_ms_ks: "
<<
a_ms_ks
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"a_ms_ks: "
<<
a_ms_ks
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"b_ks_ns: "
<<
b_ks_ns
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"b_ks_ns: "
<<
b_ks_ns
.
mDesc
<<
std
::
endl
;
...
@@ -252,6 +302,10 @@ int main(int argc, char* argv[])
...
@@ -252,6 +302,10 @@ int main(int argc, char* argv[])
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
1.0
});
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0.0
,
1.0
});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
case
3
:
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_cuTensor
<
ADataType
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_cuTensor
<
BDataType
>
{});
break
;
default:
default:
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
a_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b_ks_ns
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
...
@@ -261,6 +315,10 @@ int main(int argc, char* argv[])
...
@@ -261,6 +315,10 @@ int main(int argc, char* argv[])
DeviceMem
b_ks_ns_device_buf
(
sizeof
(
BDataType
)
*
b_ks_ns
.
mDesc
.
GetElementSpace
());
DeviceMem
b_ks_ns_device_buf
(
sizeof
(
BDataType
)
*
b_ks_ns
.
mDesc
.
GetElementSpace
());
DeviceMem
c_ms_ns_device_buf
(
sizeof
(
CDataType
)
*
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
DeviceMem
c_ms_ns_device_buf
(
sizeof
(
CDataType
)
*
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
());
std
::
cout
<<
"Tensor A element space: "
<<
a_ms_ks
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
std
::
cout
<<
"Tensor B element space: "
<<
b_ks_ns
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
std
::
cout
<<
"Tensor C element space: "
<<
c_ms_ns_device_result
.
mDesc
.
GetElementSpace
()
<<
std
::
endl
;
a_ms_ks_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
a_ms_ks_device_buf
.
ToDevice
(
a_ms_ks
.
mData
.
data
());
b_ks_ns_device_buf
.
ToDevice
(
b_ks_ns
.
mData
.
data
());
b_ks_ns_device_buf
.
ToDevice
(
b_ks_ns
.
mData
.
data
());
...
@@ -278,11 +336,11 @@ int main(int argc, char* argv[])
...
@@ -278,11 +336,11 @@ int main(int argc, char* argv[])
static_cast
<
BDataType
*>
(
b_ks_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
BDataType
*>
(
b_ks_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_ms_ns_device_buf
.
GetDeviceBuffer
()),
static_cast
<
CDataType
*>
(
c_ms_ns_device_buf
.
GetDeviceBuffer
()),
a_ms_ks_lengths
,
a_ms_ks_lengths
,
a_ms_ks_strides
,
std
::
vector
<
ck
::
index_t
>
(
a_ms_ks
.
mDesc
.
mStrides
.
begin
(),
a_ms_ks
.
mDesc
.
mStrides
.
end
())
,
b_ks_ns_lengths
,
b_ks_ns_lengths
,
b_ks_ns_strides
,
std
::
vector
<
ck
::
index_t
>
(
b_ks_ns
.
mDesc
.
mStrides
.
begin
(),
b_ks_ns
.
mDesc
.
mStrides
.
end
())
,
c_ms_ns_lengths
,
c_ms_ns_lengths
,
c_ms_ns_strides
,
std
::
vector
<
ck
::
index_t
>
(
c_ms_ns_host_result
.
mDesc
.
mStrides
.
begin
(),
c_ms_ns_host_result
.
mDesc
.
mStrides
.
end
())
,
a_element_op
,
a_element_op
,
b_element_op
,
b_element_op
,
c_element_op
);
c_element_op
);
...
@@ -324,6 +382,17 @@ int main(int argc, char* argv[])
...
@@ -324,6 +382,17 @@ int main(int argc, char* argv[])
c_ms_ns_device_buf
.
FromDevice
(
c_ms_ns_device_result
.
mData
.
data
());
c_ms_ns_device_buf
.
FromDevice
(
c_ms_ns_device_result
.
mData
.
data
());
tensorA
.
open
(
"tensor_A.txt"
);
LogRangeToFile
<
ADataType
>
(
tensorA
,
a_ms_ks
.
mData
,
","
);
LogRangeAsType
<
ADataType
>
(
std
::
cout
<<
"Tensor A elements:
\n
"
,
a_ms_ks
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorA
.
close
();
tensorB
.
open
(
"tensor_B.txt"
);
LogRangeToFile
<
BDataType
>
(
tensorB
,
b_ks_ns
.
mData
,
","
);
LogRangeAsType
<
BDataType
>
(
std
::
cout
<<
"Tensor B elements:
\n
"
,
b_ks_ns
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorB
.
close
();
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_gemm
=
ReferenceOpInstance
{};
auto
ref_gemm
=
ReferenceOpInstance
{};
...
@@ -334,6 +403,19 @@ int main(int argc, char* argv[])
...
@@ -334,6 +403,19 @@ int main(int argc, char* argv[])
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
tensorC
.
open
(
"tensor_C_contraction_host_results.txt"
);
LogRangeToFile
<
CDataType
>
(
tensorC
,
c_ms_ns_host_result
.
mData
,
","
);
LogRangeAsType
<
CDataType
>
(
std
::
cout
<<
"Tensor C_host elements:
\n
"
,
c_ms_ns_host_result
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorC
.
close
();
tensorC
.
open
(
"tensor_C_contraction_device_results.txt"
);
LogRangeToFile
<
CDataType
>
(
tensorC_d
,
c_ms_ns_device_result
.
mData
,
","
);
LogRangeAsType
<
CDataType
>
(
std
::
cout
<<
"Tensor C_device elements:
\n
"
,
c_ms_ns_device_result
.
mData
,
","
);
std
::
cout
<<
std
::
endl
;
tensorC
.
close
();
return
ck
::
utils
::
check_err
(
c_ms_ns_device_result
.
mData
,
c_ms_ns_host_result
.
mData
)
?
0
:
1
;
return
ck
::
utils
::
check_err
(
c_ms_ns_device_result
.
mData
,
c_ms_ns_host_result
.
mData
)
?
0
:
1
;
}
}
...
...
library/include/ck/library/host_tensor/device.hpp
View file @
7fd0e649
...
@@ -32,7 +32,7 @@ inline void hip_check_error(hipError_t x)
...
@@ -32,7 +32,7 @@ inline void hip_check_error(hipError_t x)
struct
DeviceMem
struct
DeviceMem
{
{
DeviceMem
()
=
de
fault
;
DeviceMem
()
=
de
lete
;
DeviceMem
(
std
::
size_t
mem_size
);
DeviceMem
(
std
::
size_t
mem_size
);
void
*
GetDeviceBuffer
();
void
*
GetDeviceBuffer
();
std
::
size_t
GetBufferSize
();
std
::
size_t
GetBufferSize
();
...
...
library/include/ck/library/host_tensor/host_tensor.hpp
View file @
7fd0e649
...
@@ -109,7 +109,7 @@ struct HostTensorDescriptor
...
@@ -109,7 +109,7 @@ struct HostTensorDescriptor
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
HostTensorDescriptor
&
desc
);
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
HostTensorDescriptor
&
desc
);
private:
//
private:
std
::
vector
<
std
::
size_t
>
mLens
;
std
::
vector
<
std
::
size_t
>
mLens
;
std
::
vector
<
std
::
size_t
>
mStrides
;
std
::
vector
<
std
::
size_t
>
mStrides
;
};
};
...
...
library/include/ck/library/host_tensor/host_tensor_generator.hpp
View file @
7fd0e649
...
@@ -74,6 +74,7 @@ struct GeneratorTensor_2<ck::bhalf_t>
...
@@ -74,6 +74,7 @@ struct GeneratorTensor_2<ck::bhalf_t>
ck
::
bhalf_t
operator
()(
Is
...)
ck
::
bhalf_t
operator
()(
Is
...)
{
{
float
tmp
=
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
float
tmp
=
(
std
::
rand
()
%
(
max_value
-
min_value
))
+
min_value
;
std
::
cout
<<
tmp
<<
","
;
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
}
}
};
};
...
@@ -123,6 +124,35 @@ struct GeneratorTensor_3<ck::bhalf_t>
...
@@ -123,6 +124,35 @@ struct GeneratorTensor_3<ck::bhalf_t>
}
}
};
};
template
<
typename
T
>
struct
GeneratorTensor_cuTensor
{
//int min_value = 0;
//int max_value = 1;
template
<
typename
...
Is
>
T
operator
()(
Is
...)
{
float
tmp
=
((
float
(
std
::
rand
()))
/
RAND_MAX
-
0.5
)
*
100
;
return
static_cast
<
T
>
(
tmp
);
}
};
template
<
>
struct
GeneratorTensor_cuTensor
<
ck
::
bhalf_t
>
{
//int min_value = 0;
//int max_value = 1;
template
<
typename
...
Is
>
ck
::
bhalf_t
operator
()(
Is
...)
{
float
tmp
=
((
float
(
std
::
rand
()))
/
RAND_MAX
-
0.5
)
*
100
;
return
ck
::
type_convert
<
ck
::
bhalf_t
>
(
tmp
);
}
};
struct
GeneratorTensor_Checkboard
struct
GeneratorTensor_Checkboard
{
{
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment