cibench.py


_cibench_generation_modules = ['pandas', 'matplotlib', 'opencv', 'scipy', 'seaborn', 'pytorch']
_cibench_generation = ['cibench_generation/' + i for i in _cibench_generation_modules]
cibench_summary_groups = []
_cibench_generation_weight = {
    'matplotlib': [223, 50, 1, 156],
    'pandas': [200, 45, 45, 38],
    'pytorch': [69, 0, 8, 11],
    'seaborn': [130, 0, 2, 106],
    'opencv': [177, 21, 6, 106],
    'scipy': [161, 94, 14, 49],
}
cibench_summary_groups.extend([
    {
        'name': 'cibench_generation:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:executable',
        'subsets': [[i, 'executable'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[1] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[2] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_generation],
        'weights': {'cibench_generation/' + k : v[3] for k,v in _cibench_generation_weight.items()},
    },
])

_cibench_generation = ['cibench_generation_oracle/' + i for i in _cibench_generation_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_generation_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[0] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[1] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[2] for k,v in _cibench_generation_weight.items()},
    },
    {
        'name': 'cibench_generation_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_generation],
        'weights': {'cibench_generation_oracle/' + k : v[3] for k,v in _cibench_generation_weight.items()},
    },
])

_cibench_template_modules = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
    'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template = ['cibench_template/' + i for i in _cibench_template_modules]
# number of total exec questions in this module
_cibench_template_weight = {
    'lightgbm': [30, 15, 0, 0],
    'matplotlib': [42, 0, 0, 36],
    'nltk': [70, 30, 20, 10],
    'opencv': [60, 10, 0, 40],
    'pandas': [60, 40, 0, 10],
    'pytorch': [28, 0, 0, 0],
    'scipy': [60, 40, 0, 0],
    'seaborn': [42, 0, 0, 35],
    'sklearn': [42, 6, 0, 18],
    'tensorflow': [36, 6, 0, 12],
}
cibench_summary_groups.extend([
    {
        'name': 'cibench_template:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template],
        'weights': {'cibench_template/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])

_cibench_template_oracle = ['cibench_template_oracle/' + i for i in _cibench_template_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_template_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_oracle],
        'weights': {'cibench_template_oracle/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])


## chinese
_cibench_template_cn_modules = ['lightgbm', 'matplotlib', 'nltk', 'opencv', 'pandas', 'pytorch',
    'scipy', 'seaborn', 'sklearn', 'tensorflow']
_cibench_template_cn = ['cibench_template_chinese/' + i for i in _cibench_template_cn_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_template_cn:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn],
        'weights': {'cibench_template_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])

_cibench_template_cn_oracle = ['cibench_template_oracle_chinese/' + i for i in _cibench_template_cn_modules]
cibench_summary_groups.extend([
    {
        'name': 'cibench_template_cn_oracle:tool_rate',
        'subsets': [[i, 'tool_rate'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:executable',
        'subsets': [[i, 'executable'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[0] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:numeric_correct',
        'subsets': [[i, 'numeric_correct'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[1] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:text_score',
        'subsets': [[i, 'text_score'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[2] for k,v in _cibench_template_weight.items()},
    },
    {
        'name': 'cibench_template_cn_oracle:vis_sim',
        'subsets': [[i, 'vis_sim'] for i in _cibench_template_cn_oracle],
        'weights': {'cibench_template_oracle_chinese/' + k : v[3] for k,v in _cibench_template_weight.items()},
    },
])


########### New summerizer for Category metric

cibench_data_manipulation = [
    ['cibench_generation/pandas', 'numeric_correct', _cibench_generation_weight['pandas'][1]],
    ['cibench_generation/pandas', 'text_score', _cibench_generation_weight['pandas'][2]],
    ['cibench_generation/pandas', 'vis_sim', _cibench_generation_weight['pandas'][3]],
    ['cibench_template/pandas', 'numeric_correct', _cibench_template_weight['pandas'][1]],
    ['cibench_template/pandas', 'text_score', _cibench_template_weight['pandas'][2]],
    ['cibench_template/pandas', 'vis_sim', _cibench_template_weight['pandas'][3]],
]
cibench_data_visualization = [
    ['cibench_generation/matplotlib', 'numeric_correct', _cibench_generation_weight['matplotlib'][1]],
    ['cibench_generation/matplotlib', 'text_score', _cibench_generation_weight['matplotlib'][2]],
    ['cibench_generation/matplotlib', 'vis_sim', _cibench_generation_weight['matplotlib'][3]],
    ['cibench_generation/seaborn', 'numeric_correct', _cibench_generation_weight['seaborn'][1]],
    ['cibench_generation/seaborn', 'text_score', _cibench_generation_weight['seaborn'][2]],
    ['cibench_generation/seaborn', 'vis_sim', _cibench_generation_weight['seaborn'][3]],
    ['cibench_template/matplotlib', 'numeric_correct', _cibench_template_weight['matplotlib'][1]],
    ['cibench_template/matplotlib', 'text_score', _cibench_template_weight['matplotlib'][2]],
    ['cibench_template/matplotlib', 'vis_sim', _cibench_template_weight['matplotlib'][3]],
    ['cibench_template/seaborn', 'numeric_correct', _cibench_template_weight['seaborn'][1]],
    ['cibench_template/seaborn', 'text_score', _cibench_template_weight['seaborn'][2]],
    ['cibench_template/seaborn', 'vis_sim', _cibench_template_weight['seaborn'][3]],
]
cibench_modeling = [
    ['cibench_generation/pytorch', 'numeric_correct', _cibench_generation_weight['pytorch'][1]],
    ['cibench_generation/pytorch', 'text_score', _cibench_generation_weight['pytorch'][2]],
    ['cibench_generation/pytorch', 'vis_sim', _cibench_generation_weight['pytorch'][3]],
    ['cibench_template/pytorch', 'numeric_correct', _cibench_template_weight['pytorch'][1]],
    ['cibench_template/pytorch', 'text_score', _cibench_template_weight['pytorch'][2]],
    ['cibench_template/pytorch', 'vis_sim', _cibench_template_weight['pytorch'][3]],
    ['cibench_template/sklearn', 'numeric_correct', _cibench_template_weight['sklearn'][1]],
    ['cibench_template/sklearn', 'text_score', _cibench_template_weight['sklearn'][2]],
    ['cibench_template/sklearn', 'vis_sim', _cibench_template_weight['sklearn'][3]],
    ['cibench_template/tensorflow', 'numeric_correct', _cibench_template_weight['tensorflow'][1]],
    ['cibench_template/tensorflow', 'text_score', _cibench_template_weight['tensorflow'][2]],
    ['cibench_template/tensorflow', 'vis_sim', _cibench_template_weight['tensorflow'][3]],
    ['cibench_template/lightgbm', 'numeric_correct', _cibench_template_weight['lightgbm'][1]],
    ['cibench_template/lightgbm', 'text_score', _cibench_template_weight['lightgbm'][2]],
    ['cibench_template/lightgbm', 'vis_sim', _cibench_template_weight['lightgbm'][3]],
]
cibench_nlp = [
    ['cibench_template/nltk', 'numeric_correct', _cibench_template_weight['nltk'][1]],
    ['cibench_template/nltk', 'text_score', _cibench_template_weight['nltk'][2]],
    ['cibench_template/nltk', 'vis_sim', _cibench_template_weight['nltk'][3]],
]
cibench_ip = [
    ['cibench_generation/opencv', 'numeric_correct', _cibench_generation_weight['opencv'][1]],
    ['cibench_generation/opencv', 'text_score', _cibench_generation_weight['opencv'][2]],
    ['cibench_generation/opencv', 'vis_sim', _cibench_generation_weight['opencv'][3]],
    ['cibench_template/opencv', 'numeric_correct', _cibench_template_weight['opencv'][1]],
    ['cibench_template/opencv', 'text_score', _cibench_template_weight['opencv'][2]],
    ['cibench_template/opencv', 'vis_sim', _cibench_template_weight['opencv'][3]],
]
cibench_math = [
    ['cibench_generation/scipy', 'numeric_correct', _cibench_generation_weight['scipy'][1]],
    ['cibench_generation/scipy', 'text_score', _cibench_generation_weight['scipy'][2]],
    ['cibench_generation/scipy', 'vis_sim', _cibench_generation_weight['scipy'][3]],
    ['cibench_template/scipy', 'numeric_correct', _cibench_template_weight['scipy'][1]],
    ['cibench_template/scipy', 'text_score', _cibench_template_weight['scipy'][2]],
    ['cibench_template/scipy', 'vis_sim', _cibench_template_weight['scipy'][3]],
]
cibench_summary_groups.extend([
    {
        'name': 'cibench_data_manipulation:scores',
        'subsets': [i[:2] for i in cibench_data_manipulation],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_manipulation},
    },
    {
        'name': 'cibench_data_visualization:scores',
        'subsets': [i[:2] for i in cibench_data_visualization],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_visualization},
    },
    {
        'name': 'cibench_modeling:scores',
        'subsets': [i[:2] for i in cibench_modeling],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_modeling},
    },
    {
        'name': 'cibench_nlp:scores',
        'subsets': [i[:2] for i in cibench_nlp],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_nlp},
    },
    {
        'name': 'cibench_ip:scores',
        'subsets': [i[:2] for i in cibench_ip],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_ip},
    },
    {
        'name': 'cibench_math:scores',
        'subsets': [i[:2] for i in cibench_math],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_math},
    },
])


########### New summerizer for Category metric oracle

cibench_data_manipulation = [
    ['cibench_generation_oracle/pandas', 'numeric_correct', _cibench_generation_weight['pandas'][1]],
    ['cibench_generation_oracle/pandas', 'text_score', _cibench_generation_weight['pandas'][2]],
    ['cibench_generation_oracle/pandas', 'vis_sim', _cibench_generation_weight['pandas'][3]],
    ['cibench_template_oracle/pandas', 'numeric_correct', _cibench_template_weight['pandas'][1]],
    ['cibench_template_oracle/pandas', 'text_score', _cibench_template_weight['pandas'][2]],
    ['cibench_template_oracle/pandas', 'vis_sim', _cibench_template_weight['pandas'][3]],
]
cibench_data_visualization = [
    ['cibench_generation_oracle/matplotlib', 'numeric_correct', _cibench_generation_weight['matplotlib'][1]],
    ['cibench_generation_oracle/matplotlib', 'text_score', _cibench_generation_weight['matplotlib'][2]],
    ['cibench_generation_oracle/matplotlib', 'vis_sim', _cibench_generation_weight['matplotlib'][3]],
    ['cibench_generation_oracle/seaborn', 'numeric_correct', _cibench_generation_weight['seaborn'][1]],
    ['cibench_generation_oracle/seaborn', 'text_score', _cibench_generation_weight['seaborn'][2]],
    ['cibench_generation_oracle/seaborn', 'vis_sim', _cibench_generation_weight['seaborn'][3]],
    ['cibench_template_oracle/matplotlib', 'numeric_correct', _cibench_template_weight['matplotlib'][1]],
    ['cibench_template_oracle/matplotlib', 'text_score', _cibench_template_weight['matplotlib'][2]],
    ['cibench_template_oracle/matplotlib', 'vis_sim', _cibench_template_weight['matplotlib'][3]],
    ['cibench_template_oracle/seaborn', 'numeric_correct', _cibench_template_weight['seaborn'][1]],
    ['cibench_template_oracle/seaborn', 'text_score', _cibench_template_weight['seaborn'][2]],
    ['cibench_template_oracle/seaborn', 'vis_sim', _cibench_template_weight['seaborn'][3]],
]
cibench_modeling = [
    ['cibench_generation_oracle/pytorch', 'numeric_correct', _cibench_generation_weight['pytorch'][1]],
    ['cibench_generation_oracle/pytorch', 'text_score', _cibench_generation_weight['pytorch'][2]],
    ['cibench_generation_oracle/pytorch', 'vis_sim', _cibench_generation_weight['pytorch'][3]],
    ['cibench_template_oracle/pytorch', 'numeric_correct', _cibench_template_weight['pytorch'][1]],
    ['cibench_template_oracle/pytorch', 'text_score', _cibench_template_weight['pytorch'][2]],
    ['cibench_template_oracle/pytorch', 'vis_sim', _cibench_template_weight['pytorch'][3]],
    ['cibench_template_oracle/sklearn', 'numeric_correct', _cibench_template_weight['sklearn'][1]],
    ['cibench_template_oracle/sklearn', 'text_score', _cibench_template_weight['sklearn'][2]],
    ['cibench_template_oracle/sklearn', 'vis_sim', _cibench_template_weight['sklearn'][3]],
    ['cibench_template_oracle/tensorflow', 'numeric_correct', _cibench_template_weight['tensorflow'][1]],
    ['cibench_template_oracle/tensorflow', 'text_score', _cibench_template_weight['tensorflow'][2]],
    ['cibench_template_oracle/tensorflow', 'vis_sim', _cibench_template_weight['tensorflow'][3]],
    ['cibench_template_oracle/lightgbm', 'numeric_correct', _cibench_template_weight['lightgbm'][1]],
    ['cibench_template_oracle/lightgbm', 'text_score', _cibench_template_weight['lightgbm'][2]],
    ['cibench_template_oracle/lightgbm', 'vis_sim', _cibench_template_weight['lightgbm'][3]],
]
cibench_nlp = [
    ['cibench_template_oracle/nltk', 'numeric_correct', _cibench_template_weight['nltk'][1]],
    ['cibench_template_oracle/nltk', 'text_score', _cibench_template_weight['nltk'][2]],
    ['cibench_template_oracle/nltk', 'vis_sim', _cibench_template_weight['nltk'][3]],
]
cibench_ip = [
    ['cibench_generation_oracle/opencv', 'numeric_correct', _cibench_generation_weight['opencv'][1]],
    ['cibench_generation_oracle/opencv', 'text_score', _cibench_generation_weight['opencv'][2]],
    ['cibench_generation_oracle/opencv', 'vis_sim', _cibench_generation_weight['opencv'][3]],
    ['cibench_template_oracle/opencv', 'numeric_correct', _cibench_template_weight['opencv'][1]],
    ['cibench_template_oracle/opencv', 'text_score', _cibench_template_weight['opencv'][2]],
    ['cibench_template_oracle/opencv', 'vis_sim', _cibench_template_weight['opencv'][3]],
]
cibench_math = [
    ['cibench_generation_oracle/scipy', 'numeric_correct', _cibench_generation_weight['scipy'][1]],
    ['cibench_generation_oracle/scipy', 'text_score', _cibench_generation_weight['scipy'][2]],
    ['cibench_generation_oracle/scipy', 'vis_sim', _cibench_generation_weight['scipy'][3]],
    ['cibench_template_oracle/scipy', 'numeric_correct', _cibench_template_weight['scipy'][1]],
    ['cibench_template_oracle/scipy', 'text_score', _cibench_template_weight['scipy'][2]],
    ['cibench_template_oracle/scipy', 'vis_sim', _cibench_template_weight['scipy'][3]],
]
cibench_summary_groups.extend([
    {
        'name': 'cibench_data_manipulation_oracle:scores',
        'subsets': [i[:2] for i in cibench_data_manipulation],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_manipulation},
    },
    {
        'name': 'cibench_data_visualization_oracle:scores',
        'subsets': [i[:2] for i in cibench_data_visualization],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_data_visualization},
    },
    {
        'name': 'cibench_modeling_oracle:scores',
        'subsets': [i[:2] for i in cibench_modeling],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_modeling},
    },
    {
        'name': 'cibench_nlp_oracle:scores',
        'subsets': [i[:2] for i in cibench_nlp],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_nlp},
    },
    {
        'name': 'cibench_ip_oracle:scores',
        'subsets': [i[:2] for i in cibench_ip],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_ip},
    },
    {
        'name': 'cibench_math_oracle:scores',
        'subsets': [i[:2] for i in cibench_math],
        'weights': {f'{k[0]}@{k[1]}': k[-1] for k in cibench_math},
    },
])