edited README, cleaned up code

7b9d9fc0 · Astha Rai · 47cc9b7e · 7b9d9fc0 · 7b9d9fc0 · 7b9d9fc0
Commit 7b9d9fc0 authored May 26, 2023 by Astha Rai
8 changed files
--- a/python/README.md
+++ b/python/README.md
@@ -4,6 +4,20 @@ There are 2 directories: shared and normal. The normal directory contains one in
 generates multiple instances and compiles them into a shared library.
 ## Normal
+To generate the cpp file and executable: 
+`python3 gemm_ex.py`
+Run the executable (same layout as CK examples: verification, initialization, run kernel # of times)
+`./ex 0 1 5`
 ## Shared
+Generate all instances, make shared library and executable
+`python3 driver.py`
+Run the executable (same layout as CK examples)
+`./example 0 1 5`
+* There's a main.cpp file needed for the executable included, so be careful when deleting the generated cpp files for the instances
+The design for parts of this code was taken from Meta's AIT library
--- a/python/normal/ck_types.py
+++ b/python/normal/ck_types.py
+# The structure for defining types is taken from Meta's AIT library
 from dataclasses import dataclass
 class DataType:
@@ -11,7 +12,7 @@ class TensorOperation:
    PassThrough = "ck::tensor_operation::element_wise::PassThrough"
 @dataclass
-class TensorDesc: #set up and import properly
+class TensorDesc:
    element: DataType
    layout: Layout
--- a/python/normal/gemm_op.py
+++ b/python/normal/gemm_op.py
-#take in input for gemm from user, send it to example template
+# The structure for constructing this gemm op was taken from AIT's implementation of creating a gemm op
+# Take in input for gemm from user, send it to example template
 import enum
 import ck_types
 from copy import deepcopy
@@ -10,22 +11,6 @@ from ck_types import *
 class GemmType():
    GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default"
-# class GemmSpecialization(enum.Enum):
-#     GemmDefault = auto()
-#     MNKPadding = auto()
-#     MNPadding = auto()
-#     MNOPadding = auto()
-#     MNKOPadding = auto()
-# GemmSpecializationTag = {
-#     GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
-#     GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
-#     GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
-#     GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
-#     GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
-# }
 @dataclass
 class TileDesc:
    block_size: int
@@ -70,7 +55,6 @@ class CBlockTransferDesc:
    def __str__(self) -> str:
        args = deepcopy(self.__dict__)
-        #args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
 @dataclass
@@ -91,7 +75,6 @@ class GemmOperation:
    def __str__(self) -> str:
        io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format(
-            #gemm_kind=library.GemmKindNames[self.operation_kind],
            gemm_specialization=self.gemm_specialization.value,
            a_dtype=[self.A.element],
            b_dtype=[self.B.element],
@@ -138,7 +121,4 @@ if __name__ == "__main__":
            [8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True
        ),
        c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8),
-        #ds_dtype=[DataType.f16],
    )
-    print(GemmOp.a_elem_op)
--- a/python/shared/ck_types.py
+++ b/python/shared/ck_types.py
+# The structure for defining types is taken from Meta's AIT library
 from dataclasses import dataclass
 class DataType:
@@ -11,7 +13,7 @@ class TensorOperation:
    PassThrough = "ck::tensor_operation::element_wise::PassThrough"
 @dataclass
-class TensorDesc: #set up and import properly
+class TensorDesc:
    element: DataType
    layout: Layout
--- a/python/shared/gemm_op.py
+++ b/python/shared/gemm_op.py
-#take in input for gemm from user, send it to example template
+# The structure for constructing this gemm op was taken from Meta's AIT's implementation of creating a gemm op
+# Take in input for gemm from user, send it to examplen input for gemm from user, send it to example template
 import enum
-import ck_types
 from copy import deepcopy
 from dataclasses import dataclass
 from enum import auto
@@ -10,22 +10,6 @@ from ck_types import *
 class GemmType():
    GemmDefault = "ck::tensor_operation::device::GemmSpecialization::Default"
-# class GemmSpecialization(enum.Enum):
-#     GemmDefault = auto()
-#     MNKPadding = auto()
-#     MNPadding = auto()
-#     MNOPadding = auto()
-#     MNKOPadding = auto()
-# GemmSpecializationTag = {
-#     GemmSpecialization.GemmDefault: "ck::tensor_operation::device::GemmSpecialization::Default",
-#     GemmSpecialization.MNKPadding: "ck::tensor_operation::device::GemmSpecialization::MNKPadding",
-#     GemmSpecialization.MNPadding: "ck::tensor_operation::device::GemmSpecialization::MNPadding",
-#     GemmSpecialization.MNOPadding: "ck::tensor_operation::device::GemmSpecialization::MNOPadding",
-#     GemmSpecialization.MNKOPadding: "ck::tensor_operation::device::GemmSpecialization::MNKOPadding",
-# }
 @dataclass
 class TileDesc:
    block_size: int
@@ -70,7 +54,6 @@ class CBlockTransferDesc:
    def __str__(self) -> str:
        args = deepcopy(self.__dict__)
-        #args["m_n_block_wave_per_xdl"] = [str(x) for x in self.m_n_block_wave_per_xdl]
 @dataclass
@@ -91,7 +74,6 @@ class GemmOperation:
    def __str__(self) -> str:
        io_name = "{gemm_kind}_{gemm_specialization}_{a_dtype}{b_dtype}{c_dtype}_{a_layout}{b_layout}{c_layout}".format(
-            #gemm_kind=library.GemmKindNames[self.operation_kind],
            gemm_specialization=self.gemm_specialization.value,
            a_dtype=[self.A.element],
            b_dtype=[self.B.element],
@@ -138,7 +120,6 @@ if __name__ == "__main__":
            [8, 32, 1], [0, 2, 1], [0, 2, 1], 1, 4, 1, 0, True
        ),
        c_block_transfer=CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8),
-        #ds_dtype=[DataType.f16],
    )
    print(GemmOp.a_elem_op)
--- a/python/shared/input.py
+++ b/python/shared/input.py
-#take in input for gemm from user, send it to example template
\ No newline at end of file
--- a/python/shared/makefile_work.txt
+++ b/python/shared/makefile_work.txt
-CFLAGS=-I ~/workspace/composable_kernel/include -I /opt/workspace/rocm-5.1.1/hip/include -I ~/workspace/composable_kernel/include/ -I ~/workspace/composable_kernel/include/ck/ -I ~/workspace/composable_kernel/example/01_gemm/ -I ~/workspace/composable_kernel/library/include/  -I ~/workspace/composable_kernel/library/src/utility/ -I ~/workspace/composable_kernel/include/ck/problem_transform/ -I ~/workspace/composable_kernel/include/ck/tensor/ -I ~/workspace/composable_kernel/include/ck/tensor_description/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/block/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/device/impl/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/element/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/grid/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/thread/ -I ~/workspace/composable_kernel/include/ck/tensor_operation/gpu/warp/ -I ~/workspace/composable_kernel/include/ck/host_utility -I /external/include/half/ -I ~/workspace/composable_kernel/library/include/ck/library/host/ -I ~/workspace/composable_kernel/library/include/ck/library/host_tensor/ -I ~/workspace/composable_kernel/library/include/ck/library/obselete_driver_offline/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/ -I ~/workspace/composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/" + "reduce/ -I ~/workspace/composable_kernel/library/include/ck/library/tensor_op/ -I ~/workspace/composable_kernel/library/include/ck/library/utility/ -I ~/workspace/composable_kernel/profiler/include/
-CXXFLAGS = -std=c++17
-device_memory.o: ../../../../../library/src/utility/device_memory.cpp
-        hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/device_memory.cpp
-host_tensor.o: ../../../../../library/src/utility/host_tensor.cpp
-        hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) $(CFLAGS) -c ../../../../../library/src/utility/host_tensor.cpp
-main.o: main.cpp
-        hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c  main.cpp
-obj_files = 256_128_128_8_2.o 256_128_128_16_2.o 128_32_128_8_2.o 128_64_32_8_2.o 128_64_128_8_2.o 128_128_32_8_2.o 128_128_64_8_2.o 256_64_128_8_2.o 256_128_64_8_2.o
-%.o : %.cpp
-        hipcc -fPIC -fvisibility=hidden $(CXXFLAGS) -w $(CFLAGS) -L/opt/rocm-5.3.0/rocrand -lrocrand -x hip -c $<
-done: libtest.so
-        cp libtest.so /lib
-main: main.o device_memory.o host_tensor.o $(obj_files)
-        hipcc $(CXXFLAGS) $(CFLAGS) main.o host_tensor.o device_memory.o $(obj_files) -o main
-libtest.so: $(obj_files) host_tensor.o device_memory.o
-        hipcc -shared $(CXXFLAGS) $(CFLAGS) -o $@ $(obj_files) host_tensor.o device_memory.o
-all: done main.o
-        hipcc $(CXXFLAGS) $(CFLAGS) -L/root/workspace/composable_kernel/python/ait_impl/generation/ex/shared -l test main.o -o example
-clean:
-        rm -f *.o libtest.so example
\ No newline at end of file
--- a/python/shared/user.py
+++ b/python/shared/user.py
+# The structure for creating a list of instances for an op 
+# was taken from Meta's AIT library 
 import gemm_op as gemm
 import enum
 from dataclasses import dataclass
@@ -6,7 +9,6 @@ import ck_types
 from ck_types import *
 def CreateGemmOperator():
-    #operation_kind = library.GemmKind.Gemm
    a_element_desc = TensorDesc(
       DataType.f16, Layout.ColumnMajor
    )
@@ -70,32 +72,6 @@ def CreateGemmOperator():
        gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 4),
        gemm.CBlockTransferDesc("S<0, 1, 2, 3, 4, 5>", 5, 2),
    ]
-    #a_block_descriptions = b_block_descriptions
-    #c_block_descriptions = []
-    # AIT logic, adapt later
-    # for t in tile_descriptions:
-    #     a_block_transfer = -1
-    #     c_block_transfer = -1
-    #     if t.block_size == 256:
-    #         a_block_transfer = [4, 64, 1]
-    #         c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 8], 8)
-    #     if t.block_size == 128:
-    #         a_block_transfer = [4, 32, 1]
-    #         if t.n_per_block == 128:
-    #             c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 16, 1, 8], 8)
-    #         if t.n_per_block == 64:
-    #             c_block_transfer = gemm.CBlockTransferDesc(1, 1, [1, 32, 1, 4], 8)
-    #     assert (
-    #         a_block_transfer != -1
-    #         and c_block_transfer != -1
-    #         and "Cannot determine block_transfer_size with block_size "
-    #         + str(t.block_size)
-    #     )
-    #     a_block_descriptions.append(
-    #         gemm.BlockTransferDesc(a_block_transfer, [1, 0, 2], [1, 0, 2], 2, 8, 8, 1)
-    #     )
-    #     c_block_descriptions.append(c_block_transfer)
    gemm_specialization = [
        gemm.GemmType.GemmDefault
@@ -109,7 +85,6 @@ def CreateGemmOperator():
            c_block_descriptions,
        ):
            new_operation = gemm.GemmOperation(
-                #operation_kind=operation_kind,
                A=a_element_desc,
                B=b_element_desc,
                C=c_element_desc,
@@ -122,9 +97,6 @@ def CreateGemmOperator():
                b_block_transfer=b_block_desc,
                c_block_transfer=c_block_desc,
            )
-            #manifest.append(new_operation)
            operations.append(new_operation)
    return operations
-    print (operations[0].tile_desc)