Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7b9d9fc0
Commit
7b9d9fc0
authored
May 26, 2023
by
Astha Rai
Browse files
edited README, cleaned up code
parent
47cc9b7e
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
26 additions
and
112 deletions
+26
-112
python/README.md
python/README.md
+14
-0
python/normal/ck_types.py
python/normal/ck_types.py
+2
-1
python/normal/gemm_op.py
python/normal/gemm_op.py
+2
-22
python/shared/ck_types.py
python/shared/ck_types.py
+3
-1
python/shared/gemm_op.py
python/shared/gemm_op.py
+2
-21
python/shared/input.py
python/shared/input.py
+0
-1
python/shared/makefile_work.txt
python/shared/makefile_work.txt
+0
-35
python/shared/user.py
python/shared/user.py
+3
-31
No files found.
python/README.md
View file @
7b9d9fc0
...
@@ -4,6 +4,20 @@ There are 2 directories: shared and normal. The normal directory contains one in
...
@@ -4,6 +4,20 @@ There are 2 directories: shared and normal. The normal directory contains one in
generates multiple instances and compiles them into a shared library.
generates multiple instances and compiles them into a shared library.
## Normal
## Normal
To generate the cpp file and executable:
`python3 gemm_ex.py`
Run the executable (same layout as CK examples: verification, initialization, run kernel # of times)
`./ex 0 1 5`
## Shared
## Shared
Generate all instances, make shared library and executable
`python3 driver.py`
Run the executable (same layout as CK examples)
`./example 0 1 5`
*
There's a main.cpp file needed for the executable included, so be careful when deleting the generated cpp files for the instances
The design for parts of this code was taken from Meta's AIT library
python/normal/ck_types.py
View file @
7b9d9fc0
# The structure for defining types is taken from Meta's AIT library
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
class
DataType
:
class
DataType
:
...
@@ -11,7 +12,7 @@ class TensorOperation:
...
@@ -11,7 +12,7 @@ class TensorOperation:
PassThrough
=
"ck::tensor_operation::element_wise::PassThrough"
PassThrough
=
"ck::tensor_operation::element_wise::PassThrough"
@
dataclass
@
dataclass
class
TensorDesc
:
#set up and import properly
class
TensorDesc
:
element
:
DataType
element
:
DataType
layout
:
Layout
layout
:
Layout
python/normal/gemm_op.py
View file @
7b9d9fc0
#take in input for gemm from user, send it to example template
# The structure for constructing this gemm op was taken from AIT's implementation of creating a gemm op
# Take in input for gemm from user, send it to example template
import
enum
import
enum
import
ck_types
import
ck_types
from
copy
import
deepcopy
from
copy
import
deepcopy
...
@@ -10,22 +11,6 @@ from ck_types import *
...
@@ -10,22 +11,6 @@ from ck_types import *
class
GemmType
():
class
GemmType
():
GemmDefault
=
"ck::tensor_operation::device::GemmSpecialization::Default"
GemmDefault
=
"ck::tensor_operation::device::GemmSpecialization::Default"
# class GemmSpecialization(enum.Enum):
# GemmDefault = auto()
# MNKPadding = auto()
# MNPadding = auto()
# MNOPadding = auto()
# MNKOPadding = auto()
# GemmSpecializationTag = {
# GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
# GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
# GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
# GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
# GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
# }
@
dataclass
@
dataclass
class
TileDesc
:
class
TileDesc
:
block_size
:
int
block_size
:
int
...
@@ -70,7 +55,6 @@ class CBlockTransferDesc:
...
@@ -70,7 +55,6 @@ class CBlockTransferDesc:
def
__str__
(
self
)
->
str
:
def
__str__
(
self
)
->
str
:
args
=
deepcopy
(
self
.
__dict__
)
args
=
deepcopy
(
self
.
__dict__
)
#args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
@
dataclass
@
dataclass
...
@@ -91,7 +75,6 @@ class GemmOperation:
...
@@ -91,7 +75,6 @@ class GemmOperation:
def
__str__
(
self
)
->
str
:
def
__str__
(
self
)
->
str
:
io_name
=
"{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}"
.
format
(
io_name
=
"{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}"
.
format
(
#gemm_kind=library.GemmKindNames[self.operation_kind],
gemm_specialization
=
self
.
gemm_specialization
.
value
,
gemm_specialization
=
self
.
gemm_specialization
.
value
,
a_dtype
=
[
self
.
A
.
element
],
a_dtype
=
[
self
.
A
.
element
],
b_dtype
=
[
self
.
B
.
element
],
b_dtype
=
[
self
.
B
.
element
],
...
@@ -138,7 +121,4 @@ if __name__ == "__main__":
...
@@ -138,7 +121,4 @@ if __name__ == "__main__":
[
8
,
32
,
1
],
[
0
,
2
,
1
],
[
0
,
2
,
1
],
1
,
4
,
1
,
0
,
True
[
8
,
32
,
1
],
[
0
,
2
,
1
],
[
0
,
2
,
1
],
1
,
4
,
1
,
0
,
True
),
),
c_block_transfer
=
CBlockTransferDesc
(
1
,
1
,
[
1
,
32
,
1
,
8
],
8
),
c_block_transfer
=
CBlockTransferDesc
(
1
,
1
,
[
1
,
32
,
1
,
8
],
8
),
#ds_dtype=[DataType.f16],
)
)
print
(
GemmOp
.
a_elem_op
)
python/shared/ck_types.py
View file @
7b9d9fc0
# The structure for defining types is taken from Meta's AIT library
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
class
DataType
:
class
DataType
:
...
@@ -11,7 +13,7 @@ class TensorOperation:
...
@@ -11,7 +13,7 @@ class TensorOperation:
PassThrough
=
"ck::tensor_operation::element_wise::PassThrough"
PassThrough
=
"ck::tensor_operation::element_wise::PassThrough"
@
dataclass
@
dataclass
class
TensorDesc
:
#set up and import properly
class
TensorDesc
:
element
:
DataType
element
:
DataType
layout
:
Layout
layout
:
Layout
python/shared/gemm_op.py
View file @
7b9d9fc0
#take in input for gemm from user, send it to example template
# The structure for constructing this gemm op was taken from Meta's AIT's implementation of creating a gemm op
# Take in input for gemm from user, send it to examplen input for gemm from user, send it to example template
import
enum
import
enum
import
ck_types
from
copy
import
deepcopy
from
copy
import
deepcopy
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
enum
import
auto
from
enum
import
auto
...
@@ -10,22 +10,6 @@ from ck_types import *
...
@@ -10,22 +10,6 @@ from ck_types import *
class
GemmType
():
class
GemmType
():
GemmDefault
=
"ck::tensor_operation::device::GemmSpecialization::Default"
GemmDefault
=
"ck::tensor_operation::device::GemmSpecialization::Default"
# class GemmSpecialization(enum.Enum):
# GemmDefault = auto()
# MNKPadding = auto()
# MNPadding = auto()
# MNOPadding = auto()
# MNKOPadding = auto()
# GemmSpecializationTag = {
# GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
# GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
# GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
# GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
# GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
# }
@
dataclass
@
dataclass
class
TileDesc
:
class
TileDesc
:
block_size
:
int
block_size
:
int
...
@@ -70,7 +54,6 @@ class CBlockTransferDesc:
...
@@ -70,7 +54,6 @@ class CBlockTransferDesc:
def
__str__
(
self
)
->
str
:
def
__str__
(
self
)
->
str
:
args
=
deepcopy
(
self
.
__dict__
)
args
=
deepcopy
(
self
.
__dict__
)
#args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
@
dataclass
@
dataclass
...
@@ -91,7 +74,6 @@ class GemmOperation:
...
@@ -91,7 +74,6 @@ class GemmOperation:
def
__str__
(
self
)
->
str
:
def
__str__
(
self
)
->
str
:
io_name
=
"{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}"
.
format
(
io_name
=
"{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}"
.
format
(
#gemm_kind=library.GemmKindNames[self.operation_kind],
gemm_specialization
=
self
.
gemm_specialization
.
value
,
gemm_specialization
=
self
.
gemm_specialization
.
value
,
a_dtype
=
[
self
.
A
.
element
],
a_dtype
=
[
self
.
A
.
element
],
b_dtype
=
[
self
.
B
.
element
],
b_dtype
=
[
self
.
B
.
element
],
...
@@ -138,7 +120,6 @@ if __name__ == "__main__":
...
@@ -138,7 +120,6 @@ if __name__ == "__main__":
[
8
,
32
,
1
],
[
0
,
2
,
1
],
[
0
,
2
,
1
],
1
,
4
,
1
,
0
,
True
[
8
,
32
,
1
],
[
0
,
2
,
1
],
[
0
,
2
,
1
],
1
,
4
,
1
,
0
,
True
),
),
c_block_transfer
=
CBlockTransferDesc
(
1
,
1
,
[
1
,
32
,
1
,
8
],
8
),
c_block_transfer
=
CBlockTransferDesc
(
1
,
1
,
[
1
,
32
,
1
,
8
],
8
),
#ds_dtype=[DataType.f16],
)
)
print
(
GemmOp
.
a_elem_op
)
print
(
GemmOp
.
a_elem_op
)
python/shared/input.py
deleted
100644 → 0
View file @
47cc9b7e
#take in input for gemm from user, send it to example template
\ No newline at end of file
python/shared/makefile_work.txt
deleted
100644 → 0
View file @
47cc9b7e
CFLAGS=-I ~/workspace/composable_kernel/include -I /opt/workspace/rocm-5.1.1/hip/include -I ~/workspace/composable_kernel/include/ -I ~/workspace/composable_kernel/include/ck/ -I ~/workspace/composable_kernel/example/01_gemm/ -I ~/workspace/composable_kernel/library/include/ -I ~/workspace/composable_kernel/library/src/utility/ -I ~/workspace/composable_kernel/include/ck/problem_transform/ -I ~/workspace/composable_kernel/include/ck/tensor/ -I ~/workspace/composable_kernel/include/ck/tensor_description/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/block/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/impl/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/element/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/grid/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/thread/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/warp/ -I ~/workspace/composable_kernel/include/ck/host_utility -I /external/include/half/ -I ~/workspace/composable_kernel/library/include/ck/library/host/ -I ~/workspace/composable_kernel/library/include/ck/library/host_tensor/ -I ~/workspace/composable_kernel/library/include/ck/library/obselete_driver_offline/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/" + "reduce/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_op/ -I ~/workspace/composable_kernel/library/include/ck/library/utility/ -I ~/workspace/composable_kernel/profiler/include/
CXXFLAGS = -std=c++17
device_memory.o: ../../../../../library/src/utility/device_memory.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/device_memory.cpp
host_tensor.o: ../../../../../library/src/utility/host_tensor.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/host_tensor.cpp
main.o: main.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c main.cpp
obj_files = 256_128_128_8_2.o 256_128_128_16_2.o 128_32_128_8_2.o 128_64_32_8_2.o 128_64_128_8_2.o 128_128_32_8_2.o 128_128_64_8_2.o 256_64_128_8_2.o 256_128_64_8_2.o
%.o : %.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c $<
done: libtest.so
cp libtest.so /lib
main: main.o device_memory.o host_tensor.o $(obj_files)
hipcc $(CXXFLAGS) $(CFLAGS) main.o host_tensor.o device_memory.o $(obj_files) -o main
libtest.so: $(obj_files) host_tensor.o device_memory.o
hipcc -shared $(CXXFLAGS) $(CFLAGS) -o $@ $(obj_files) host_tensor.o device_memory.o
all: done main.o
hipcc $(CXXFLAGS) $(CFLAGS) -L/root/workspace/composable_kernel/python/ait_impl/generation/ex/shared -l test main.o -o example
clean:
rm -f *.o libtest.so example
\ No newline at end of file
python/shared/user.py
View file @
7b9d9fc0
# The structure for creating a list of instances for an op
# was taken from Meta's AIT library
import
gemm_op
as
gemm
import
gemm_op
as
gemm
import
enum
import
enum
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
...
@@ -6,7 +9,6 @@ import ck_types
...
@@ -6,7 +9,6 @@ import ck_types
from
ck_types
import
*
from
ck_types
import
*
def
CreateGemmOperator
():
def
CreateGemmOperator
():
#operation_kind = library.GemmKind.Gemm
a_element_desc
=
TensorDesc
(
a_element_desc
=
TensorDesc
(
DataType
.
f16
,
Layout
.
ColumnMajor
DataType
.
f16
,
Layout
.
ColumnMajor
)
)
...
@@ -70,32 +72,6 @@ def CreateGemmOperator():
...
@@ -70,32 +72,6 @@ def CreateGemmOperator():
gemm
.
CBlockTransferDesc
(
"S<0, 1, 2, 3, 4, 5>"
,
5
,
4
),
gemm
.
CBlockTransferDesc
(
"S<0, 1, 2, 3, 4, 5>"
,
5
,
4
),
gemm
.
CBlockTransferDesc
(
"S<0, 1, 2, 3, 4, 5>"
,
5
,
2
),
gemm
.
CBlockTransferDesc
(
"S<0, 1, 2, 3, 4, 5>"
,
5
,
2
),
]
]
#a_block_descriptions = b_block_descriptions
#c_block_descriptions = []
# AIT logic, adapt later
# for t in tile_descriptions:
# a_block_transfer = -1
# c_block_transfer = -1
# if t.block_size == 256:
# a_block_transfer = [4, 64, 1]
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8)
# if t.block_size == 128:
# a_block_transfer = [4, 32, 1]
# if t.n_per_block == 128:
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 16, 1, 8], 8)
# if t.n_per_block == 64:
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 4], 8)
# assert (
# a_block_transfer != -1
# and c_block_transfer != -1
# and "Cannot determine block_transfer_size with block_size "
# + str(t.block_size)
# )
# a_block_descriptions.append(
# gemm.BlockTransferDesc(a_block_transfer, [1, 0, 2], [1, 0, 2], 2, 8, 8, 1)
# )
# c_block_descriptions.append(c_block_transfer)
gemm_specialization
=
[
gemm_specialization
=
[
gemm
.
GemmType
.
GemmDefault
gemm
.
GemmType
.
GemmDefault
...
@@ -109,7 +85,6 @@ def CreateGemmOperator():
...
@@ -109,7 +85,6 @@ def CreateGemmOperator():
c_block_descriptions
,
c_block_descriptions
,
):
):
new_operation
=
gemm
.
GemmOperation
(
new_operation
=
gemm
.
GemmOperation
(
#operation_kind=operation_kind,
A
=
a_element_desc
,
A
=
a_element_desc
,
B
=
b_element_desc
,
B
=
b_element_desc
,
C
=
c_element_desc
,
C
=
c_element_desc
,
...
@@ -122,9 +97,6 @@ def CreateGemmOperator():
...
@@ -122,9 +97,6 @@ def CreateGemmOperator():
b_block_transfer
=
b_block_desc
,
b_block_transfer
=
b_block_desc
,
c_block_transfer
=
c_block_desc
,
c_block_transfer
=
c_block_desc
,
)
)
#manifest.append(new_operation)
operations
.
append
(
new_operation
)
operations
.
append
(
new_operation
)
return
operations
return
operations
print
(
operations
[
0
].
tile_desc
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment