"...git@developer.sourcefind.cn:cnjsdfcy/simbricks.git" did not exist on "50ba6ab041684ef820d07fb6538e1ce2d34e1e67"
Commit 7d346000 authored by gaotongxiao's avatar gaotongxiao
Browse files

initial commit

parents
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
summedits_reader_cfg = dict(
input_columns=['doc', 'summary'],
output_column='label',
test_split='train')
summedits_prompt = """
Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document.
Document:
{doc}
Summary:
{summary}
Is the summary factually consistent with the document?
"""
summedits_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0: f"{summedits_prompt}Answer: No.",
1: f"{summedits_prompt}Answer: Yes."
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
summedits_datasets = [
dict(
type=HFDataset,
abbr='summedits',
path='json',
split='train',
data_files='./data/summedits/summedits.jsonl',
reader_cfg=summedits_reader_cfg,
infer_cfg=summedits_infer_cfg,
eval_cfg=summedits_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import BleuEvaluator
from opencompass.datasets import SummScreenDataset
summscreen_reader_cfg = dict(
input_columns='content',
output_column='summary',
train_split='dev',
test_split='dev')
summscreen_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(
role='SYSTEM',
fallback_role="HUMAN",
prompt=
'Please summarize the following English play script in English:'
),
],
round=[
dict(role='HUMAN', prompt='{content}'),
dict(role='BOT', prompt='{summary}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
summscreen_eval_cfg = dict(
evaluator=dict(type=BleuEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type='general_cn'),
dataset_postprocessor=dict(type='general_cn'))
summscreen_datasets = [
dict(
type=SummScreenDataset,
path='./data/SummScreen/',
abbr='SummScreen',
reader_cfg=summscreen_reader_cfg,
infer_cfg=summscreen_infer_cfg,
eval_cfg=summscreen_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TruthfulQADataset, TruthfulQAEvaluator
truthfulqa_reader_cfg = dict(
input_columns=['question'],
output_column='reference',
train_split='validation',
test_split='validation')
# TODO: allow empty output-column
truthfulqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{question}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
# Metrics such as 'truth' and 'info' needs
# OPENAI_API_KEY with finetuned models in it.
# Please use your own finetuned openai model with keys and refers to
# the source code for more details
# Metrics such as 'bleurt', 'rouge', 'bleu' are free to test
# When key is set to "ENV", the key will be fetched from the environment
# variable $OPENAI_API_KEY. Otherwise, set key in here directly.
truthfulqa_eval_cfg = dict(
evaluator=dict(
type=TruthfulQAEvaluator, metrics=('truth', 'info'), key='ENV'), )
truthfulqa_datasets = [
dict(
type=TruthfulQADataset,
path='truthful_qa',
name='generation',
reader_cfg=truthfulqa_reader_cfg,
infer_cfg=truthfulqa_infer_cfg,
eval_cfg=truthfulqa_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winogradDataset
winograd_reader_cfg = dict(
input_columns=['prompt', 'pronoun', 'opt1', 'opt2'],
output_column='label',
train_split='test',
test_split='test')
winograd_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(
role="HUMAN",
prompt=
f"{{prompt}} Q: In the previous text, what does '{{pronoun}}' refer to? A: {{opt{i+1}}}"
), # noqa
])
for i in range(2)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
winograd_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
winograd_datasets = [
dict(
abbr='winograd',
type=winogradDataset,
path='winograd_wsc',
name='wsc285',
reader_cfg=winograd_reader_cfg,
infer_cfg=winograd_infer_cfg,
eval_cfg=winograd_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import winograndeDataset_V2
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="label",
test_split="validation")
winogrande_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
prompt=
"Which of the following is a good sentence:\nA. {opt1}\nB. {opt2}\nAnswer:"
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
winogrande_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type="first-capital"),
)
winogrande_datasets = [
dict(
abbr="winogrande",
type=winograndeDataset_V2,
path="winogrande",
name="winogrande_xs",
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg,
)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset
z_bench_reader_cfg = dict(
input_columns=['text'], output_column='category', train_split='test')
z_bench_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{text}',
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
z_bench_dataset = dict(
type=HFDataset,
path=
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
data_dir=
'/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
name='question',
reader_cfg=z_bench_reader_cfg,
infer_cfg=z_bench_infer_cfg)
from opencompass.models import OpenAI
models = [
dict(abbr='GPT-3.5-turbo',
type=OpenAI, path='gpt-3.5-turbo', key='sk-xxx',
max_out_len=2048, max_seq_len=2048, batch_size=1)
]
from mmengine.config import read_base
with read_base():
from .groups.agieval import agieval_summary_groups
from .groups.mmlu import mmlu_summary_groups
from .groups.ceval import ceval_summary_groups
from .groups.bbh import bbh_summary_groups
from .groups.GaokaoBench import GaokaoBench_summary_groups
from .groups.flores import flores_summary_groups
summarizer = dict(
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
prompt_db=dict(
database_path='configs/datasets/log.json',
config_dir='configs/datasets',
blacklist='.promptignore')
)
GaokaoBench_summary_groups = []
# gaokao-bench
_GaokaoBench_weights = {'2010-2022_Math_II_MCQs': 1090, '2010-2022_Math_I_MCQs': 1070, '2010-2022_History_MCQs': 1148, '2010-2022_Biology_MCQs': 900, '2010-2022_Political_Science_MCQs': 1280, '2010-2022_Physics_MCQs': 384, '2010-2022_Chemistry_MCQs': 744, '2010-2013_English_MCQs': 105, '2010-2022_Chinese_Modern_Lit': 261, '2010-2022_English_Fill_in_Blanks': 900.0, '2012-2022_English_Cloze_Test': 260, '2010-2022_Geography_MCQs': 380, '2010-2022_English_Reading_Comp': 940, '2010-2022_Chinese_Lang_and_Usage_MCQs': 240}
GaokaoBench_summary_groups.append({'name': 'GaokaoBench', 'subsets': list(_GaokaoBench_weights.keys()), 'weights': _GaokaoBench_weights})
var collapsedSections = ['Advanced Guides', 'Tools', 'User Guides', 'Notes'];
$(document).ready(function () {
$('.model-summary').DataTable({
"stateSave": false,
"lengthChange": false,
"pageLength": 20,
"order": []
});
});
# flake8: noqa
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import subprocess
import sys
import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder
sys.path.insert(0, os.path.abspath('../../'))
# -- Project information -----------------------------------------------------
project = 'OpenCompass'
copyright = '2023, OpenCompass'
author = 'OpenCompass Authors'
# The full version, including alpha/beta/rc tags
# version_file = '../../opencompass/version.py'
# def get_version():
# with open(version_file, 'r') as f:
# exec(compile(f.read(), version_file, 'exec'))
# return locals()['__version__']
release = '1.0.0'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'myst_parser',
'sphinx_copybutton',
'sphinx_tabs.tabs',
'notfound.extension',
'sphinxcontrib.jquery',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
'.rst': 'restructuredtext',
'.md': 'markdown',
}
language = 'en'
# The master toctree document.
root_doc = 'index'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# yapf: disable
html_theme_options = {
'menu': [
{
'name': 'GitHub',
'url': 'https://github.com/opencompass'
},
],
# Specify the language of shared menu
'menu_lang': 'en',
# Disable the default edit on GitHub
'default_edit_on_github': False,
}
# yapf: enable
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.css',
'css/readthedocs.css'
]
html_js_files = [
'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.js',
'js/custom.js'
]
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'opencompassdoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(root_doc, 'opencompass.tex', 'OpenCompass Documentation', author,
'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author],
1)]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(root_doc, 'opencompass', 'OpenCompass Documentation', author,
'OpenCompass Authors', 'AGI evaluation toolbox and benchmark.',
'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True
# Auto-generated header anchors
myst_heading_anchors = 3
# Enable "colon_fence" extension of myst.
myst_enable_extensions = ['colon_fence', 'dollarmath']
# Configuration for intersphinx
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None),
'numpy': ('https://numpy.org/doc/stable', None),
'torch': ('https://pytorch.org/docs/stable/', None),
'mmengine': ('https://mmengine.readthedocs.io/en/latest/', None),
'transformers':
('https://huggingface.co/docs/transformers/main/en/', None),
}
napoleon_custom_sections = [
# Custom sections for data elements.
('Meta fields', 'params_style'),
('Data fields', 'params_style'),
]
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# Mock some imports during generate API docs.
autodoc_mock_imports = ['rich', 'attr', 'einops']
# Disable displaying type annotations, these can be very verbose
autodoc_typehints = 'none'
# The not found page
notfound_template = '404.html'
# Overview
# Installation
1. Prepare Torch refer to [PyTorch](https://pytorch.org/).
Notice that OpenCompass requires `pytorch>=1.13`.
```bash
conda create --name opencompass python=3.8 -y
conda activate opencompass
conda install pytorch torchvision -c pytorch
```
2. Install OpenCompass:
```bash
git clone https://github.com/opencompass/opencompass
cd opencompass
pip install -r requirments/runtime.txt
pip install -e .
```
3. Install humaneval (option)
do this if you want to eval on humaneval dataset.
```
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirments.txt
pip install -e .
```
Remember to remove the comments of Line48-57 and uncomment [line58](https://github.com/openai/human-eval/blob/312c5e5532f0e0470bf47f77a6243e02a61da530/human_eval/execution.py#L58) in the source code.
# Quick tour
Welcome to OpenCompass' documentation!
==========================================
Hands-on Roadmap of OpenCompass
-------------------------------
To help users quickly utilize OpenCompass, we recommend following the hands-on
roadmap we have created for the library:
- For users who want to use OpenCompass, we recommend reading the GetStarted_ section first to set up the environment.
- For some basic usage, we suggest users read the UserGuides_.
- If you want to customize the algorithm, we have provided the AdvancedGuides_.
- If you want to adjust the prompts, you can browse the Prompt_.
- We also offer the Tools_.
We always welcome *PRs* and *Issues* for the betterment of MMPretrain.
.. _GetStarted:
.. toctree::
:maxdepth: 1
:caption: Get Started
get_started.md
.. _UserGuides:
.. toctree::
:maxdepth: 1
:caption: UserGuides
user_guides/framework_overview.md
user_guides/config.md
user_guides/dataset_prepare.md
user_guides/models.md
user_guides/evaluation.md
user_guides/experimentation.md
user_guides/metrics.md
.. _AdvancedGuides:
.. toctree::
:maxdepth: 1
:caption: AdvancedGuides
advanced_guides/new_dataset.md
advanced_guides/new_model.md
.. _Prompt:
.. toctree::
:maxdepth: 1
:caption: Prompt
prompt/overview.md
prompt/few_shot.md
prompt/prompt_template.md
prompt/meta_template.md
.. _Tools:
.. toctree::
:maxdepth: 1
:caption: tools
tools.md
.. _Notes:
.. toctree::
:maxdepth: 1
:caption: Notes
notes/contribution_guide.md
.. toctree::
:caption: switch language
English <https://OpenCompass.readthedocs.io/en/latest/>
简体中文 <https://OpenCompass.readthedocs.io/zh_CN/latest/>
Indexes & Tables
==================
* :ref:`genindex`
* :ref:`search`
\ No newline at end of file
# Contributing to OpenCompass
- [Contributing to OpenCompass](#contributing-to-opencompass)
- [Workflow](#workflow)
- [Code style](#code-style)
- [Python](#python)
- [Pre-commit Hook](#pre-commit-hook)
Thanks for your interest in contributing to OpenCompass! All kinds of contributions are welcome, including but not limited to the following.
- Fix typo or bugs
- Add documentation or translate the documentation into other languages
- Add new features and components
## Workflow
We recommend the potential contributors follow this workflow for contribution.
1. Fork and pull the latest OpenCompass repository, follow [get started](https://OpenCompass.readthedocs.io/en/latest/get_started.html) to setup the environment.
2. Checkout a new branch (**do not use the master or dev branch** for PRs)
```bash
git checkout -b xxxx # xxxx is the name of new branch
```
3. Edit the related files follow the code style mentioned below
4. Use [pre-commit hook](https://pre-commit.com/) to check and format your changes.
5. Commit your changes
6. Create a PR with related information
## Code style
### Python
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
We use the following tools for linting and formatting:
- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools.
- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports.
- [yapf](https://github.com/google/yapf): A formatter for Python files.
- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files.
- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files.
- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.
Style configurations of yapf and isort can be found in [setup.cfg](https://github.com/open-mmlab/OpenCompass/blob/main/setup.cfg).
## Pre-commit Hook
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
The config for a pre-commit hook is stored in [.pre-commit-config](xxxxxxx).
After you clone the repository, you will need to install initialize pre-commit hook.
```shell
pip install -U pre-commit
```
From the repository folder
```shell
pre-commit install
```
After this on every commit check code linters and formatter will be enforced.
> Before you create a PR, make sure that your code lints and is formatted by yapf.
\ No newline at end of file
# In-context Learning
\ No newline at end of file
# Preparing and Selecting Datasets
This section of the tutorial mainly focuses on how to prepare the datasets supported by OpenCompass and build configuration files to complete dataset selection.
## Directory Structure of Dataset Configuration Files
First, let's introduce the structure under the `configs/datasets` directory in OpenCompass, as shown below:
```
configs/datasets/
├── ChineseUniversal # Ability dimension
│ ├── CLUE_afqmc # Dataset under this dimension
│ │ ├── CLUE_afqmc_gen_db509b.py # Different configuration files for this dataset
│ │ ├── CLUE_afqmc_gen.py
│ │ ├── CLUE_afqmc_ppl_00b348.py
│ │ ├── CLUE_afqmc_ppl_2313cf.py
│ │ └── CLUE_afqmc_ppl.py
│ ├── CLUE_C3
│ │ ├── ...
│ ├── ...
├── Coding
├── collections
├── Completion
├── EnglishUniversal
├── Exam
├── glm
├── LongText
├── MISC
├── NLG
├── QA
├── Reasoning
├── Security
└── Translation
```
In the `configs/datasets` directory structure, we have divided the datasets into over ten dimensions based on ability dimensions, such as: Chinese and English Universal, Exam, QA, Reasoning, Security, etc. Each dimension contains a series of datasets, and there are multiple dataset configurations in the corresponding folder of each dataset.
The naming of the dataset configuration file is made up of `{dataset name}_{evaluation method}_{prompt version number}.py`. For example, `ChineseUniversal/CLUE_afqmc/CLUE_afqmc_gen_db509b.py`, this configuration file is the `CLUE_afqmc` dataset under the Chinese universal ability, the corresponding evaluation method is `gen`, i.e., generative evaluation, and the corresponding prompt version number is `db509b`; similarly, `CLUE_afqmc_ppl_00b348.py` indicates that the evaluation method is `ppl`, i.e., discriminative evaluation, and the prompt version number is `00b348`.
In addition, files without a version number, such as: `CLUE_afqmc_gen.py`, point to the latest prompt configuration file of that evaluation method, which is usually the most accurate prompt.
## Dataset Preparation
The datasets supported by OpenCompass mainly include two parts:
1. Huggingface Dataset
[Huggingface Dataset](https://huggingface.co/datasets) provides a large number of datasets. OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
2. OpenCompass Self-built Datasets
In addition to supporting Huggingface's existing datasets, OpenCompass also provides some self-built CN datasets. In the future, a dataset-related Repo will be provided for users to download and use. Following the instructions in the document to place the datasets uniformly in the `./data` directory can complete dataset preparation.
It is important to note that the Repo not only contains self-built datasets, but also includes some HF-supported datasets for testing convenience.
## Dataset Selection
In each dataset configuration file, the dataset will be defined in the `{}_datasets` variable, such as `afqmc_datasets` in `ChineseUniversal/CLUE_afqmc/CLUE_afqmc_gen_db509b.py`.
```python
afqmc_datasets = [
dict(
abbr="afqmc-dev",
type=AFQMCDataset_V2,
path="./data/CLUE/AFQMC/dev.json",
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg,
),
]
```
And `afqmc_datasets` in `ChineseUniversal/CLUE_cmnli/CLUE_cmnli_ppl_b78ad4.py`.
```python
cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]
```
Take these two datasets as examples. If users want to evaluate these two datasets at the same time, they can create a new configuration file in the `configs` directory. We use the import mechanism in the `mmengine` configuration to build the part of the dataset parameters in the evaluation script, as shown below:
```python
from mmengine.config import read_base
with read_base():
from .datasets.CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets
from .datasets.CLUE_cmnli.CLUE_cmnli_ppl_b78ad4 import cmnli_datasets
datasets = []
datasets += afqmc_datasets
datasets += cmnli_datasets
```
Users can choose different abilities, different datasets and different evaluation methods configuration files to build the part of the dataset in the evaluation script according to their needs.
For information on how to start an evaluation task and how to evaluate self-built datasets, please refer to the relevant documents.
# 支持新数据集
尽管 OpenCompass 已经包含了大多数常用数据集,用户在支持新数据集的时候需要完成以下几个步骤:
1.`opencompass/datasets` 文件夹新增数据集脚本 `mydataset.py`, 该脚本需要包含:
- 数据集及其加载方式,需要定义一个 `MyDataset` 类,实现数据集加载方法 `load` ,该方法为静态方法,需要返回 `datasets.Dataset` 类型的数据。这里我们使用 huggingface dataset 作为数据集的统一接口,避免引入额外的逻辑。具体示例如下:
```python
import datasets
from .base import BaseDataset
class MyDataset(BaseDataset):
@staticmethod
def load(**kwargs) -> datasets.Dataset:
pass
```
- (可选)如果OpenCompass已有的evaluator不能满足需要,需要用户定义 `MyDatasetlEvaluator` 类,实现评分方法 `score` ,需要根据输入的 `predictions` 和 `references` 列表,得到需要的字典。由于一个数据集可能存在多种metric,需要返回一个 metrics 以及对应 scores 的相关字典。具体示例如下:
```python
from opencompass.openicl.icl_evaluator import BaseEvaluator
class MyDatasetlEvaluator(BaseEvaluator):
def score(self, predictions: List, references: List) -> dict:
pass
```
- (可选)如果 OpenCompass 已有的 postprocesser 不能满足需要,需要用户定义 `mydataset_postprocess` 方法,根据输入的字符串得到相应后处理的结果。具体示例如下:
```python
def mydataset_postprocess(text: str) -> str:
pass
```
2. 在定义好数据集加载,数据后处理以及 `evaluator` 等方法之后,需要在配置文件中新增以下配置:
```python
from opencompass.datasets import MyDataset, MyDatasetlEvaluator, mydataset_postprocess
mydataset_eval_cfg = dict(
evaluator=dict(type=MyDatasetlEvaluator),
pred_postprocessor=dict(type=mydataset_postprocess))
mydataset_datasets = [
dict(
type=MyDataset,
...,
reader_cfg=...,
infer_cfg=...,
eval_cfg=mydataset_eval_cfg)
]
```
配置好数据集之后,其他需要的配置文件直接参考如何启动评测任务教程即可。
\ No newline at end of file
# 支持新模型
目前我们已经支持的模型有 HF 模型、部分模型 API 、自建模型和部分第三方模型。
## 新增API模型
新增基于API的模型,需要在 `opencompass/models` 下新建 `mymodel_api.py` 文件,继承 `BaseAPIModel`,并实现 `generate` 方法来进行推理,以及 `get_token_len` 方法来计算 token 的长度。在定义好之后修改对应配置文件名称即可。
```python
from ..base_api import BaseAPIModel
class MyModelAPI(BaseAPIModel):
is_api: bool = True
def __init__(self,
path: str,
max_seq_len: int = 2048,
query_per_second: int = 1,
retry: int = 2,
**kwargs):
super().__init__(path=path,
max_seq_len=max_seq_len,
meta_template=meta_template,
query_per_second=query_per_second,
retry=retry)
...
def generate(
self,
inputs,
max_out_len: int = 512,
temperature: float = 0.7,
) -> List[str]:
"""Generate results given a list of inputs."""
pass
def get_token_len(self, prompt: str) -> int:
"""Get lengths of the tokenized string."""
pass
```
## 新增第三方模型
新增基于API的模型,需要在 `opencompass/models` 下新建 `mymodel.py` 文件,继承 `BaseModel`,并实现 `generate` 方法来进行生成式推理, `get_ppl` 方法来进行判别式推理,以及 `get_token_len` 方法来计算 token 的长度。在定义好之后修改对应配置文件名称即可。
```python
from ..base import BaseModel
class MyModel(BaseModel):
def __init__(self,
pkg_root: str,
ckpt_path: str,
tokenizer_only: bool = False,
meta_template: Optional[Dict] = None,
**kwargs):
...
def get_token_len(self, prompt: str) -> int:
"""Get lengths of the tokenized strings."""
pass
def generate(self, inputs: List[str], max_out_len: int) -> List[str]:
"""Generate results given a list of inputs. """
pass
def get_ppl(self,
inputs: List[str],
mask_length: Optional[List[int]] = None) -> List[float]:
"""Get perplexity scores given a list of inputs."""
pass
```
# flake8: noqa
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import subprocess
import sys
import pytorch_sphinx_theme
from sphinx.builders.html import StandaloneHTMLBuilder
sys.path.insert(0, os.path.abspath('../../'))
# -- Project information -----------------------------------------------------
project = 'OpenCompass'
copyright = '2023, OpenCompass'
author = 'OpenCompass Authors'
# The full version, including alpha/beta/rc tags
# version_file = '../../opencompass/version.py'
# def get_version():
# with open(version_file, 'r') as f:
# exec(compile(f.read(), version_file, 'exec'))
# return locals()['__version__']
release = "0.5.0"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'myst_parser',
'sphinx_copybutton',
'sphinx_tabs.tabs',
'notfound.extension',
'sphinxcontrib.jquery',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
'.rst': 'restructuredtext',
'.md': 'markdown',
}
language = 'en'
# The master toctree document.
root_doc = 'index'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'pytorch_sphinx_theme'
html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# yapf: disable
html_theme_options = {
'menu': [
{
'name': 'GitHub',
'url': 'https://github.com/open-mmlab/opencompass'
},
],
# Specify the language of shared menu
'menu_lang': 'en',
# Disable the default edit on GitHub
'default_edit_on_github': False,
}
# yapf: enable
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.css',
'css/readthedocs.css'
]
html_js_files = [
'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.js',
'js/custom.js'
]
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'opencompassdoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, 'manual'),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(root_doc, 'opencompass', 'OpenCompass Documentation', author, 'OpenCompass Authors',
'AGI evaluation toolbox and benchmark.', 'Miscellaneous'),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']
# set priority when building html
StandaloneHTMLBuilder.supported_image_types = [
'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
]
# -- Extension configuration -------------------------------------------------
# Ignore >>> when copying code
copybutton_prompt_text = r'>>> |\.\.\. '
copybutton_prompt_is_regexp = True
# Auto-generated header anchors
myst_heading_anchors = 3
# Enable "colon_fence" extension of myst.
myst_enable_extensions = ['colon_fence', 'dollarmath']
# Configuration for intersphinx
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None),
'numpy': ('https://numpy.org/doc/stable', None),
'torch': ('https://pytorch.org/docs/stable/', None),
'mmengine': ('https://mmengine.readthedocs.io/en/latest/', None),
'transformers':
('https://huggingface.co/docs/transformers/main/en/', None),
}
napoleon_custom_sections = [
# Custom sections for data elements.
('Meta fields', 'params_style'),
('Data fields', 'params_style'),
]
# Disable docstring inheritance
autodoc_inherit_docstrings = False
# Mock some imports during generate API docs.
autodoc_mock_imports = ['rich', 'attr', 'einops']
# Disable displaying type annotations, these can be very verbose
autodoc_typehints = 'none'
# The not found page
notfound_template = '404.html'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment