"platforms/vscode:/vscode.git/clone" did not exist on "4ab645ea6911bf29e9ec6fd13b50ba1e196cbc40"
moe_tuner.py 4.88 KB
Newer Older
Xiaowei.zhang's avatar
Xiaowei.zhang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
import os
import torch
from pathlib import Path

import aiter
import pandas as pd
from moe_problem import MoeProblem,get_dtype
from aiter.jit.utils.chip_info import get_gfx

class MoeTuner:
    def __init__(self, indtype, tuned_file=None, mp=1):
        self.arch = get_gfx()
        self.moe_pro_df = pd.DataFrame(columns=["quant_type", "indtype", "token", "inter_dim", "model_dim", "expert", "topk", "q_size_n", "q_size_k"])
14
        self._int_cols = ["token", "inter_dim", "model_dim", "expert", "topk", "q_size_n", "q_size_k"]
Xiaowei.zhang's avatar
Xiaowei.zhang committed
15
16
17
18
19
        self.indtype = indtype
        self.tuned_file = tuned_file
        self.mp = mp
        
        if Path(tuned_file).is_file():
20
21
22
23
24
            self.tuned_shapes = pd.read_csv(tuned_file).dropna(how='all').fillna(0)
            int_cols = ['token', 'inter_dim', 'model_dim', 'expert', 'topk', 'q_size_n', 'q_size_k']
            for c in int_cols:
                if c in self.tuned_shapes.columns:
                    self.tuned_shapes[c] = self.tuned_shapes[c].astype(int)
Xiaowei.zhang's avatar
Xiaowei.zhang committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
        else:
            self.tuned_shapes = None

    def add_moe(self, quant_type, indtype, token, inter_dim, model_dim, expert, topk, q_size_n=0, q_size_k=0):
        indtype_str = str(indtype)

        if self.tuned_shapes is None or (
            self.tuned_shapes[
                (self.tuned_shapes["arch"] == self.arch)
                & (self.tuned_shapes["quant_type"] == quant_type)
                & (self.tuned_shapes["indtype"] == indtype_str)
                & (self.tuned_shapes["token"] == token)
                & (self.tuned_shapes["inter_dim"] == inter_dim)
                & (self.tuned_shapes["model_dim"] == model_dim)
                & (self.tuned_shapes["expert"] == expert)
                & (self.tuned_shapes["topk"] == topk)
                & (self.tuned_shapes["q_size_n"] == q_size_n)
                & (self.tuned_shapes["q_size_k"] == q_size_k)
            ].empty
        ):
            entry = {
                "quant_type": [quant_type],
                "indtype": [indtype_str],
48
49
50
51
52
53
54
                "token": [int(token)],
                "inter_dim": [int(inter_dim)],
                "model_dim": [int(model_dim)],
                "expert": [int(expert)],
                "topk": [int(topk)],
                "q_size_n": [int(q_size_n)],
                "q_size_k": [int(q_size_k)]
Xiaowei.zhang's avatar
Xiaowei.zhang committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
            }
            df = pd.DataFrame(entry)
            self.moe_pro_df = pd.concat([self.moe_pro_df, df], ignore_index=True)

        else:
            print(f">>>Info: Skipping already tuned shape: quant_type={quant_type}, indtype={indtype_str},"
                  f"token={token}, inter_dim={inter_dim}, model_dim={model_dim}, expert={expert}, topk={topk}, q_size_n={q_size_n}, q_size_k={q_size_k}")

    def find_best_sols(self):
        file_exists = os.path.exists(self.tuned_file)
        df = self.moe_pro_df
        for i in range(len(df)):
            ds = df.iloc[i]
            print(f">>>Info: Tuning Moe: quant_type={ds['quant_type']}, indtype={ds['indtype']}, "
                  f"token={ds['token']}, inter_dim={ds['inter_dim']}, model_dim={ds['model_dim']}, expert={ds['expert']}, topk={ds['topk']}, q_size_n={ds['q_size_n']}, q_size_k={ds['q_size_k']}")
            
            indtype_str = ds["indtype"]
            indtype = get_dtype(indtype_str)
            
            moe_obj = MoeProblem(
                ds["quant_type"],
                indtype,
                int(ds["token"]),
                int(ds["inter_dim"]),
                int(ds["model_dim"]),
                int(ds["expert"]),
                int(ds["topk"]),
                int(ds["q_size_n"]),
                int(ds["q_size_k"]),
                mp=self.mp,
            )
            
            # 1. find best solutions in CK
            moe_obj.find_ck_solutions(fast_mode=1)
            
            # 2. find best solutions in ASM
            moe_obj.find_asm_solutions(fast_mode=1)
            
            # 3. find best solutions in Triton
            moe_obj.find_triton_solutions(fast_mode=1)
            
            # 4. pick the fastest one
            moe_obj.find_fastest_solution()

            #5. write to csv
            temp_archdf = pd.DataFrame({"arch": [self.arch]})
            temp_ds_df = pd.DataFrame([ds])
            temp_soldf = pd.DataFrame({
                "sol_type": [moe_obj.sol_type],
                "sol_id": [moe_obj.sol_id],
                "time_us": [moe_obj.time_us]
            })
            temp_archdf.index = [0]
            temp_ds_df.index = [0]
            temp_soldf.index = [0]
            current_row = pd.concat([temp_archdf, temp_ds_df, temp_soldf], axis=1)
            print(current_row)
            current_row.to_csv(
                self.tuned_file,
                mode='a',
                header=not file_exists and i == 0,
                index=False
            )

            del moe_obj
            torch.cuda.empty_cache()

        finaldf = pd.read_csv(self.tuned_file)
        print(finaldf)