Commit 7b9d9fc0 authored by Astha Rai's avatar Astha Rai
Browse files

edited README, cleaned up code

parent 47cc9b7e
...@@ -4,6 +4,20 @@ There are 2 directories: shared and normal. The normal directory contains one in ...@@ -4,6 +4,20 @@ There are 2 directories: shared and normal. The normal directory contains one in
generates multiple instances and compiles them into a shared library. generates multiple instances and compiles them into a shared library.
## Normal ## Normal
To generate the cpp file and executable:
`python3 gemm_ex.py`
Run the executable (same layout as CK examples: verification, initialization, run kernel # of times)
`./ex 0 1 5`
## Shared ## Shared
Generate all instances, make shared library and executable
`python3 driver.py`
Run the executable (same layout as CK examples)
`./example 0 1 5`
* There's a main.cpp file needed for the executable included, so be careful when deleting the generated cpp files for the instances
The design for parts of this code was taken from Meta's AIT library
# The structure for defining types is taken from Meta's AIT library
from dataclasses import dataclass from dataclasses import dataclass
class DataType: class DataType:
...@@ -11,7 +12,7 @@ class TensorOperation: ...@@ -11,7 +12,7 @@ class TensorOperation:
PassThrough = "ck::tensor_operation::element_wise::PassThrough" PassThrough = "ck::tensor_operation::element_wise::PassThrough"
@dataclass @dataclass
class TensorDesc: #set up and import properly class TensorDesc:
element: DataType element: DataType
layout: Layout layout: Layout
#take in input for gemm from user, send it to example template # The structure for constructing this gemm op was taken from AIT's implementation of creating a gemm op
# Take in input for gemm from user, send it to example template
import enum import enum
import ck_types import ck_types
from copy import deepcopy from copy import deepcopy
...@@ -10,22 +11,6 @@ from ck_types import * ...@@ -10,22 +11,6 @@ from ck_types import *
class GemmType(): class GemmType():
GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default" GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default"
# class GemmSpecialization(enum.Enum):
# GemmDefault = auto()
# MNKPadding = auto()
# MNPadding = auto()
# MNOPadding = auto()
# MNKOPadding = auto()
# GemmSpecializationTag = {
# GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
# GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
# GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
# GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
# GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
# }
@dataclass @dataclass
class TileDesc: class TileDesc:
block_size: int block_size: int
...@@ -70,7 +55,6 @@ class CBlockTransferDesc: ...@@ -70,7 +55,6 @@ class CBlockTransferDesc:
def __str__(self) -> str: def __str__(self) -> str:
args = deepcopy(self.__dict__) args = deepcopy(self.__dict__)
#args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
@dataclass @dataclass
...@@ -91,7 +75,6 @@ class GemmOperation: ...@@ -91,7 +75,6 @@ class GemmOperation:
def __str__(self) -> str: def __str__(self) -> str:
io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format( io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format(
#gemm_kind=library.GemmKindNames[self.operation_kind],
gemm_specialization=self.gemm_specialization.value, gemm_specialization=self.gemm_specialization.value,
a_dtype=[self.A.element], a_dtype=[self.A.element],
b_dtype=[self.B.element], b_dtype=[self.B.element],
...@@ -138,7 +121,4 @@ if __name__ == "__main__": ...@@ -138,7 +121,4 @@ if __name__ == "__main__":
[8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True [8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True
), ),
c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8), c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8),
#ds_dtype=[DataType.f16],
) )
print(GemmOp.a_elem_op)
# The structure for defining types is taken from Meta's AIT library
from dataclasses import dataclass from dataclasses import dataclass
class DataType: class DataType:
...@@ -11,7 +13,7 @@ class TensorOperation: ...@@ -11,7 +13,7 @@ class TensorOperation:
PassThrough = "ck::tensor_operation::element_wise::PassThrough" PassThrough = "ck::tensor_operation::element_wise::PassThrough"
@dataclass @dataclass
class TensorDesc: #set up and import properly class TensorDesc:
element: DataType element: DataType
layout: Layout layout: Layout
#take in input for gemm from user, send it to example template # The structure for constructing this gemm op was taken from Meta's AIT's implementation of creating a gemm op
# Take in input for gemm from user, send it to examplen input for gemm from user, send it to example template
import enum import enum
import ck_types
from copy import deepcopy from copy import deepcopy
from dataclasses import dataclass from dataclasses import dataclass
from enum import auto from enum import auto
...@@ -10,22 +10,6 @@ from ck_types import * ...@@ -10,22 +10,6 @@ from ck_types import *
class GemmType(): class GemmType():
GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default" GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default"
# class GemmSpecialization(enum.Enum):
# GemmDefault = auto()
# MNKPadding = auto()
# MNPadding = auto()
# MNOPadding = auto()
# MNKOPadding = auto()
# GemmSpecializationTag = {
# GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
# GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
# GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
# GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
# GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
# }
@dataclass @dataclass
class TileDesc: class TileDesc:
block_size: int block_size: int
...@@ -70,7 +54,6 @@ class CBlockTransferDesc: ...@@ -70,7 +54,6 @@ class CBlockTransferDesc:
def __str__(self) -> str: def __str__(self) -> str:
args = deepcopy(self.__dict__) args = deepcopy(self.__dict__)
#args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
@dataclass @dataclass
...@@ -91,7 +74,6 @@ class GemmOperation: ...@@ -91,7 +74,6 @@ class GemmOperation:
def __str__(self) -> str: def __str__(self) -> str:
io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format( io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format(
#gemm_kind=library.GemmKindNames[self.operation_kind],
gemm_specialization=self.gemm_specialization.value, gemm_specialization=self.gemm_specialization.value,
a_dtype=[self.A.element], a_dtype=[self.A.element],
b_dtype=[self.B.element], b_dtype=[self.B.element],
...@@ -138,7 +120,6 @@ if __name__ == "__main__": ...@@ -138,7 +120,6 @@ if __name__ == "__main__":
[8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True [8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True
), ),
c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8), c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8),
#ds_dtype=[DataType.f16],
) )
print(GemmOp.a_elem_op) print(GemmOp.a_elem_op)
#take in input for gemm from user, send it to example template
\ No newline at end of file
CFLAGS=-I ~/workspace/composable_kernel/include -I /opt/workspace/rocm-5.1.1/hip/include -I ~/workspace/composable_kernel/include/ -I ~/workspace/composable_kernel/include/ck/ -I ~/workspace/composable_kernel/example/01_gemm/ -I ~/workspace/composable_kernel/library/include/ -I ~/workspace/composable_kernel/library/src/utility/ -I ~/workspace/composable_kernel/include/ck/problem_transform/ -I ~/workspace/composable_kernel/include/ck/tensor/ -I ~/workspace/composable_kernel/include/ck/tensor_description/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/block/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/impl/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/element/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/grid/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/thread/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/warp/ -I ~/workspace/composable_kernel/include/ck/host_utility -I /external/include/half/ -I ~/workspace/composable_kernel/library/include/ck/library/host/ -I ~/workspace/composable_kernel/library/include/ck/library/host_tensor/ -I ~/workspace/composable_kernel/library/include/ck/library/obselete_driver_offline/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/" + "reduce/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_op/ -I ~/workspace/composable_kernel/library/include/ck/library/utility/ -I ~/workspace/composable_kernel/profiler/include/
CXXFLAGS = -std=c++17
device_memory.o: ../../../../../library/src/utility/device_memory.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/device_memory.cpp
host_tensor.o: ../../../../../library/src/utility/host_tensor.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/host_tensor.cpp
main.o: main.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c main.cpp
obj_files = 256_128_128_8_2.o 256_128_128_16_2.o 128_32_128_8_2.o 128_64_32_8_2.o 128_64_128_8_2.o 128_128_32_8_2.o 128_128_64_8_2.o 256_64_128_8_2.o 256_128_64_8_2.o
%.o : %.cpp
hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c $<
done: libtest.so
cp libtest.so /lib
main: main.o device_memory.o host_tensor.o $(obj_files)
hipcc $(CXXFLAGS) $(CFLAGS) main.o host_tensor.o device_memory.o $(obj_files) -o main
libtest.so: $(obj_files) host_tensor.o device_memory.o
hipcc -shared $(CXXFLAGS) $(CFLAGS) -o $@ $(obj_files) host_tensor.o device_memory.o
all: done main.o
hipcc $(CXXFLAGS) $(CFLAGS) -L/root/workspace/composable_kernel/python/ait_impl/generation/ex/shared -l test main.o -o example
clean:
rm -f *.o libtest.so example
\ No newline at end of file
# The structure for creating a list of instances for an op
# was taken from Meta's AIT library
import gemm_op as gemm import gemm_op as gemm
import enum import enum
from dataclasses import dataclass from dataclasses import dataclass
...@@ -6,7 +9,6 @@ import ck_types ...@@ -6,7 +9,6 @@ import ck_types
from ck_types import * from ck_types import *
def CreateGemmOperator(): def CreateGemmOperator():
#operation_kind = library.GemmKind.Gemm
a_element_desc = TensorDesc( a_element_desc = TensorDesc(
DataType.f16, Layout.ColumnMajor DataType.f16, Layout.ColumnMajor
) )
...@@ -70,32 +72,6 @@ def CreateGemmOperator(): ...@@ -70,32 +72,6 @@ def CreateGemmOperator():
gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 4), gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 4),
gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 2), gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 2),
] ]
#a_block_descriptions = b_block_descriptions
#c_block_descriptions = []
# AIT logic, adapt later
# for t in tile_descriptions:
# a_block_transfer = -1
# c_block_transfer = -1
# if t.block_size == 256:
# a_block_transfer = [4, 64, 1]
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8)
# if t.block_size == 128:
# a_block_transfer = [4, 32, 1]
# if t.n_per_block == 128:
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 16, 1, 8], 8)
# if t.n_per_block == 64:
# c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 4], 8)
# assert (
# a_block_transfer != -1
# and c_block_transfer != -1
# and "Cannot determine block_transfer_size with block_size "
# + str(t.block_size)
# )
# a_block_descriptions.append(
# gemm.BlockTransferDesc(a_block_transfer, [1, 0, 2], [1, 0, 2], 2, 8, 8, 1)
# )
# c_block_descriptions.append(c_block_transfer)
gemm_specialization = [ gemm_specialization = [
gemm.GemmType.GemmDefault gemm.GemmType.GemmDefault
...@@ -109,7 +85,6 @@ def CreateGemmOperator(): ...@@ -109,7 +85,6 @@ def CreateGemmOperator():
c_block_descriptions, c_block_descriptions,
): ):
new_operation = gemm.GemmOperation( new_operation = gemm.GemmOperation(
#operation_kind=operation_kind,
A=a_element_desc, A=a_element_desc,
B=b_element_desc, B=b_element_desc,
C=c_element_desc, C=c_element_desc,
...@@ -122,9 +97,6 @@ def CreateGemmOperator(): ...@@ -122,9 +97,6 @@ def CreateGemmOperator():
b_block_transfer=b_block_desc, b_block_transfer=b_block_desc,
c_block_transfer=c_block_desc, c_block_transfer=c_block_desc,
) )
#manifest.append(new_operation)
operations.append(new_operation) operations.append(new_operation)
return operations return operations
print (operations[0].tile_desc)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment