import os import torch from pathlib import Path import aiter import pandas as pd from moe_problem import MoeProblem,get_dtype from aiter.jit.utils.chip_info import get_gfx class MoeTuner: def __init__(self, indtype, tuned_file=None, mp=1): self.arch = get_gfx() self.moe_pro_df = pd.DataFrame(columns=["quant_type", "indtype", "token", "inter_dim", "model_dim", "expert", "topk", "q_size_n", "q_size_k"]) self._int_cols = ["token", "inter_dim", "model_dim", "expert", "topk", "q_size_n", "q_size_k"] self.indtype = indtype self.tuned_file = tuned_file self.mp = mp if Path(tuned_file).is_file(): self.tuned_shapes = pd.read_csv(tuned_file).dropna(how='all').fillna(0) int_cols = ['token', 'inter_dim', 'model_dim', 'expert', 'topk', 'q_size_n', 'q_size_k'] for c in int_cols: if c in self.tuned_shapes.columns: self.tuned_shapes[c] = self.tuned_shapes[c].astype(int) else: self.tuned_shapes = None def add_moe(self, quant_type, indtype, token, inter_dim, model_dim, expert, topk, q_size_n=0, q_size_k=0): indtype_str = str(indtype) if self.tuned_shapes is None or ( self.tuned_shapes[ (self.tuned_shapes["arch"] == self.arch) & (self.tuned_shapes["quant_type"] == quant_type) & (self.tuned_shapes["indtype"] == indtype_str) & (self.tuned_shapes["token"] == token) & (self.tuned_shapes["inter_dim"] == inter_dim) & (self.tuned_shapes["model_dim"] == model_dim) & (self.tuned_shapes["expert"] == expert) & (self.tuned_shapes["topk"] == topk) & (self.tuned_shapes["q_size_n"] == q_size_n) & (self.tuned_shapes["q_size_k"] == q_size_k) ].empty ): entry = { "quant_type": [quant_type], "indtype": [indtype_str], "token": [int(token)], "inter_dim": [int(inter_dim)], "model_dim": [int(model_dim)], "expert": [int(expert)], "topk": [int(topk)], "q_size_n": [int(q_size_n)], "q_size_k": [int(q_size_k)] } df = pd.DataFrame(entry) self.moe_pro_df = pd.concat([self.moe_pro_df, df], ignore_index=True) else: print(f">>>Info: Skipping already tuned shape: quant_type={quant_type}, indtype={indtype_str}," f"token={token}, inter_dim={inter_dim}, model_dim={model_dim}, expert={expert}, topk={topk}, q_size_n={q_size_n}, q_size_k={q_size_k}") def find_best_sols(self): file_exists = os.path.exists(self.tuned_file) df = self.moe_pro_df for i in range(len(df)): ds = df.iloc[i] print(f">>>Info: Tuning Moe: quant_type={ds['quant_type']}, indtype={ds['indtype']}, " f"token={ds['token']}, inter_dim={ds['inter_dim']}, model_dim={ds['model_dim']}, expert={ds['expert']}, topk={ds['topk']}, q_size_n={ds['q_size_n']}, q_size_k={ds['q_size_k']}") indtype_str = ds["indtype"] indtype = get_dtype(indtype_str) moe_obj = MoeProblem( ds["quant_type"], indtype, int(ds["token"]), int(ds["inter_dim"]), int(ds["model_dim"]), int(ds["expert"]), int(ds["topk"]), int(ds["q_size_n"]), int(ds["q_size_k"]), mp=self.mp, ) # 1. find best solutions in CK moe_obj.find_ck_solutions(fast_mode=1) # 2. find best solutions in ASM moe_obj.find_asm_solutions(fast_mode=1) # 3. find best solutions in Triton moe_obj.find_triton_solutions(fast_mode=1) # 4. pick the fastest one moe_obj.find_fastest_solution() #5. write to csv temp_archdf = pd.DataFrame({"arch": [self.arch]}) temp_ds_df = pd.DataFrame([ds]) temp_soldf = pd.DataFrame({ "sol_type": [moe_obj.sol_type], "sol_id": [moe_obj.sol_id], "time_us": [moe_obj.time_us] }) temp_archdf.index = [0] temp_ds_df.index = [0] temp_soldf.index = [0] current_row = pd.concat([temp_archdf, temp_ds_df, temp_soldf], axis=1) print(current_row) current_row.to_csv( self.tuned_file, mode='a', header=not file_exists and i == 0, index=False ) del moe_obj torch.cuda.empty_cache() finaldf = pd.read_csv(self.tuned_file) print(finaldf)