Commit ceb7915b authored by sharkgene@qq.com's avatar sharkgene@qq.com
Browse files

do exception for non-numeric

parent 10bb8437
File added
......@@ -9,11 +9,13 @@
"模型": []
},
"distinguish": ["模型", "卡类型", "卡数"],
"group_by": [["并发数"], ["输入长度(tokens)", "输出长度(tokens)"]],
"dist_cols": ["模型", "卡数"],
"group_cols": [["并发数"], ["输入长度(tokens)", "输出长度(tokens)"]],
"key_cols": ["卡类型", "vLLM版本", "V0/V1 Engine"],
"metric_cols": ["平均首字延时TTFT(ms)", "平均生成时间TPOT(ms)", "生成吞吐量(tokens/s)", "总吞吐量(tokens/s)"],
"files": [
{
"file": "test100.xlsx",
"file": "t.xlsx",
"sheets": [],
"column_mapping": {
},
......@@ -24,12 +26,16 @@
}
filter: 过滤条件,不设置或者为空则不过滤
distinguish: 选择输出文件的条件,例如["模型", "卡类型", "卡数"]则将相同的模型、卡类型和卡数的所有数据生成以个文件
group_by: 可以进行最多2重分组,每层一个图,第2层以不同柱状显示数据。如果命令行参数指定合并,则所有合成一个图,分组之间间隔开
dist_cols: 选择输出文件的条件,例如["模型", "卡类型", "卡数"]则将相同的模型、卡类型和卡数的所有数据生成以个文件
group_cols: 可以进行最多2重分组,每层一个图,第2层以不同柱状显示数据。如果命令行参数指定合并,则所有合成一个图,分组之间间隔开
key_cols: 分组内的键值
metric_cols: 度量列,不设置缺省上述4个,会将所有非数值格式转换为0
files: 指定文件信息, 可以使用多个文件
file: 文件名
sheets: 可以指定处理的sheet,不指定则遍历所有
column_mapping: 列名重映射,用于处理多个文件时表头信息略有差异的情况
参考配置说明:
```
## 使用方法
```
......
......@@ -4,9 +4,10 @@
"模型": []
},
"distinguish": ["模型", "卡数"],
"group_by": [["并发数"], ["输入长度(tokens)", "输出长度(tokens)"]],
"colkey": ["卡类型", "vLLM版本", "V0/V1 Engine"],
"dist_cols": ["模型", "卡数"],
"group_cols": [["并发数"], ["输入长度(tokens)", "输出长度(tokens)"]],
"key_cols": ["卡类型", "vLLM版本", "V0/V1 Engine"],
"metric_cols": ["平均首字延时TTFT(ms)", "平均生成时间TPOT(ms)", "生成吞吐量(tokens/s)", "总吞吐量(tokens/s)"],
"files": [
{
"file": "t.xlsx",
......
......@@ -270,30 +270,31 @@ df_renamed = apply_filter(df_renamed, filter_config)
print(f"过滤后数据量: {len(df_renamed)}")
metric_cols = [
'平均首字延时TTFT(ms)',
'平均生成时间TPOT(ms)',
'生成吞吐量(tokens/s)',
'总吞吐量(tokens/s)'
]
dist_cols_config = config.get('distinguish', ['模型', '卡数'])
dist_cols_config = config.get('dist_cols', ['模型', '卡数'])
dist_cols = [col_mapping.get(c, c) for c in dist_cols_config]
dist_cols = [c for c in dist_cols if c in df_renamed.columns]
os.makedirs(args.输出目录, exist_ok=True)
group_by = config.get('group_by', [[], []])
if isinstance(group_by[0], list):
outer_group = group_by[0] if len(group_by) > 0 else []
inner_group = group_by[1] if len(group_by) > 1 else []
group_cols = config.get('group_cols', [[], []])
if isinstance(group_cols[0], list):
outer_group = group_cols[0] if len(group_cols) > 0 else []
inner_group = group_cols[1] if len(group_cols) > 1 else []
else:
outer_group = []
inner_group = group_by
colkey = config.get('colkey', [])
if len(colkey) == 0:
inner_group = group_cols
key_cols = config.get('key_cols', [])
if len(key_cols) == 0:
print(f"column key error")
metric_cols = config.get('metric_cols', [
'平均首字延时TTFT(ms)',
'平均生成时间TPOT(ms)',
'生成吞吐量(tokens/s)',
'总吞吐量(tokens/s)'
])
dist_combinations = df_renamed.groupby(dist_cols).size().reset_index()
print(f"\n将生成 {len(dist_combinations)} 个图表...")
......@@ -320,7 +321,7 @@ for idx, (_, dist_row) in enumerate(dist_combinations.iterrows()):
# df_subset[c] = df_subset[c].astype('float64')
#except Exception as e:
# print(f"数据转换错误, 列名{c}, 错误信息{e}")
success = generate_chart(df_subset, output_path, colkey, outer_group, inner_group, metric_cols, args.合并分组)
success = generate_chart(df_subset, output_path, key_cols, outer_group, inner_group, metric_cols, args.合并分组)
if success:
chart_count += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment