moe_tuner.py 4.47 KB
Newer Older
Xiaowei.zhang's avatar
Xiaowei.zhang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import torch
from pathlib import Path

import aiter
import pandas as pd
from moe_problem import MoeProblem,get_dtype
from aiter.jit.utils.chip_info import get_gfx

class MoeTuner:
    def __init__(self, indtype, tuned_file=None, mp=1):
        self.arch = get_gfx()
        self.moe_pro_df = pd.DataFrame(columns=["quant_type", "indtype", "token", "inter_dim", "model_dim", "expert", "topk", "q_size_n", "q_size_k"])
        self.indtype = indtype
        self.tuned_file = tuned_file
        self.mp = mp
        
        if Path(tuned_file).is_file():
            self.tuned_shapes = pd.read_csv(tuned_file).fillna("")
        else:
            self.tuned_shapes = None

    def add_moe(self, quant_type, indtype, token, inter_dim, model_dim, expert, topk, q_size_n=0, q_size_k=0):
        indtype_str = str(indtype)

        if self.tuned_shapes is None or (
            self.tuned_shapes[
                (self.tuned_shapes["arch"] == self.arch)
                & (self.tuned_shapes["quant_type"] == quant_type)
                & (self.tuned_shapes["indtype"] == indtype_str)
                & (self.tuned_shapes["token"] == token)
                & (self.tuned_shapes["inter_dim"] == inter_dim)
                & (self.tuned_shapes["model_dim"] == model_dim)
                & (self.tuned_shapes["expert"] == expert)
                & (self.tuned_shapes["topk"] == topk)
                & (self.tuned_shapes["q_size_n"] == q_size_n)
                & (self.tuned_shapes["q_size_k"] == q_size_k)
            ].empty
        ):
            entry = {
                "quant_type": [quant_type],
                "indtype": [indtype_str],
                "token": [token],
                "inter_dim": [inter_dim],
                "model_dim": [model_dim],
                "expert": [expert],
                "topk": [topk],
                "q_size_n": [q_size_n],
                "q_size_k": [q_size_k]
            }
            df = pd.DataFrame(entry)
            self.moe_pro_df = pd.concat([self.moe_pro_df, df], ignore_index=True)

        else:
            print(f">>>Info: Skipping already tuned shape: quant_type={quant_type}, indtype={indtype_str},"
                  f"token={token}, inter_dim={inter_dim}, model_dim={model_dim}, expert={expert}, topk={topk}, q_size_n={q_size_n}, q_size_k={q_size_k}")

    def find_best_sols(self):
        file_exists = os.path.exists(self.tuned_file)
        df = self.moe_pro_df
        for i in range(len(df)):
            ds = df.iloc[i]
            print(f">>>Info: Tuning Moe: quant_type={ds['quant_type']}, indtype={ds['indtype']}, "
                  f"token={ds['token']}, inter_dim={ds['inter_dim']}, model_dim={ds['model_dim']}, expert={ds['expert']}, topk={ds['topk']}, q_size_n={ds['q_size_n']}, q_size_k={ds['q_size_k']}")
            
            indtype_str = ds["indtype"]
            indtype = get_dtype(indtype_str)
            
            moe_obj = MoeProblem(
                ds["quant_type"],
                indtype,
                int(ds["token"]),
                int(ds["inter_dim"]),
                int(ds["model_dim"]),
                int(ds["expert"]),
                int(ds["topk"]),
                int(ds["q_size_n"]),
                int(ds["q_size_k"]),
                mp=self.mp,
            )
            
            # 1. find best solutions in CK
            moe_obj.find_ck_solutions(fast_mode=1)
            
            # 2. find best solutions in ASM
            moe_obj.find_asm_solutions(fast_mode=1)
            
            # 3. find best solutions in Triton
            moe_obj.find_triton_solutions(fast_mode=1)
            
            # 4. pick the fastest one
            moe_obj.find_fastest_solution()

            #5. write to csv
            temp_archdf = pd.DataFrame({"arch": [self.arch]})
            temp_ds_df = pd.DataFrame([ds])
            temp_soldf = pd.DataFrame({
                "sol_type": [moe_obj.sol_type],
                "sol_id": [moe_obj.sol_id],
                "time_us": [moe_obj.time_us]
            })
            temp_archdf.index = [0]
            temp_ds_df.index = [0]
            temp_soldf.index = [0]
            current_row = pd.concat([temp_archdf, temp_ds_df, temp_soldf], axis=1)
            print(current_row)
            current_row.to_csv(
                self.tuned_file,
                mode='a',
                header=not file_exists and i == 0,
                index=False
            )

            del moe_obj
            torch.cuda.empty_cache()

        finaldf = pd.read_csv(self.tuned_file)
        print(finaldf)