initial commit

7d346000 · gaotongxiao · 7d346000 · 7d346000 · 7d346000 · 7d346000
Commit 7d346000 authored Jul 04, 2023 by gaotongxiao
20 changed files
--- a/configs/datasets/summedits/summedits_ppl_c4d270.py
+++ b/configs/datasets/summedits/summedits_ppl_c4d270.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+summedits_reader_cfg = dict(
+    input_columns=['doc', 'summary'],
+    output_column='label',
+    test_split='train')
+summedits_prompt = """
+Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document.
+Document:
+{doc}
+Summary:
+{summary}
+Is the summary factually consistent with the document?
+"""
+summedits_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: f"{summedits_prompt}Answer: No.",
+            1: f"{summedits_prompt}Answer: Yes."
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+summedits_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+summedits_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='summedits',
+        path='json',
+        split='train',
+        data_files='./data/summedits/summedits.jsonl',
+        reader_cfg=summedits_reader_cfg,
+        infer_cfg=summedits_infer_cfg,
+        eval_cfg=summedits_eval_cfg)
+]
--- a/configs/datasets/summscreen/summscreen_gen_e88eaa.py
+++ b/configs/datasets/summscreen/summscreen_gen_e88eaa.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import SummScreenDataset
+summscreen_reader_cfg = dict(
+    input_columns='content',
+    output_column='summary',
+    train_split='dev',
+    test_split='dev')
+summscreen_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role="HUMAN",
+                    prompt=
+                    'Please summarize the following English play script in English:'
+                ),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{content}'),
+                dict(role='BOT', prompt='{summary}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(
+        type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
+summscreen_eval_cfg = dict(
+    evaluator=dict(type=BleuEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type='general_cn'),
+    dataset_postprocessor=dict(type='general_cn'))
+summscreen_datasets = [
+    dict(
+        type=SummScreenDataset,
+        path='./data/SummScreen/',
+        abbr='SummScreen',
+        reader_cfg=summscreen_reader_cfg,
+        infer_cfg=summscreen_infer_cfg,
+        eval_cfg=summscreen_eval_cfg)
+]
--- a/configs/datasets/truthfulqa/truthfulqa_gen_d8faf6.py
+++ b/configs/datasets/truthfulqa/truthfulqa_gen_d8faf6.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import TruthfulQADataset, TruthfulQAEvaluator
+truthfulqa_reader_cfg = dict(
+    input_columns=['question'],
+    output_column='reference',
+    train_split='validation',
+    test_split='validation')
+# TODO: allow empty output-column
+truthfulqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{question}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+# Metrics such as 'truth' and 'info' needs
+# OPENAI_API_KEY with finetuned models in it.
+# Please use your own finetuned openai model with keys and refers to
+# the source code for more details
+# Metrics such as 'bleurt', 'rouge', 'bleu' are free to test
+# When key is set to "ENV", the key will be fetched from the environment
+# variable $OPENAI_API_KEY. Otherwise, set key in here directly.
+truthfulqa_eval_cfg = dict(
+    evaluator=dict(
+        type=TruthfulQAEvaluator, metrics=('truth', 'info'), key='ENV'), )
+truthfulqa_datasets = [
+    dict(
+        type=TruthfulQADataset,
+        path='truthful_qa',
+        name='generation',
+        reader_cfg=truthfulqa_reader_cfg,
+        infer_cfg=truthfulqa_infer_cfg,
+        eval_cfg=truthfulqa_eval_cfg)
+]
--- a/configs/datasets/winograd/winograd_ppl_c1c427.py
+++ b/configs/datasets/winograd/winograd_ppl_c1c427.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winogradDataset
+winograd_reader_cfg = dict(
+    input_columns=['prompt', 'pronoun', 'opt1', 'opt2'],
+    output_column='label',
+    train_split='test',
+    test_split='test')
+winograd_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            i: dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"{{prompt}} Q: In the previous text, what does '{{pronoun}}' refer to? A: {{opt{i+1}}}"
+                ),  # noqa
+            ])
+            for i in range(2)
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+winograd_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+winograd_datasets = [
+    dict(
+        abbr='winograd',
+        type=winogradDataset,
+        path='winograd_wsc',
+        name='wsc285',
+        reader_cfg=winograd_reader_cfg,
+        infer_cfg=winograd_infer_cfg,
+        eval_cfg=winograd_eval_cfg)
+]
--- a/configs/datasets/winogrande/winogrande_gen_c19d87.py
+++ b/configs/datasets/winogrande/winogrande_gen_c19d87.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winograndeDataset_V2
+winogrande_reader_cfg = dict(
+    input_columns=["opt1", "opt2"],
+    output_column="label",
+    test_split="validation")
+winogrande_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "Which of the following is a good sentence:\nA. {opt1}\nB. {opt2}\nAnswer:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+winogrande_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+winogrande_datasets = [
+    dict(
+        abbr="winogrande",
+        type=winograndeDataset_V2,
+        path="winogrande",
+        name="winogrande_xs",
+        reader_cfg=winogrande_reader_cfg,
+        infer_cfg=winogrande_infer_cfg,
+        eval_cfg=winogrande_eval_cfg,
+    )
+]
--- a/configs/datasets/z_bench/z_bench_gen_5813ec.py
+++ b/configs/datasets/z_bench/z_bench_gen_5813ec.py
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset
+z_bench_reader_cfg = dict(
+    input_columns=['text'], output_column='category', train_split='test')
+z_bench_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{text}',
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+z_bench_dataset = dict(
+    type=HFDataset,
+    path=
+    '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
+    data_dir=
+    '/mnt/petrelfs/gaotong/llm_eval/openagieval_dataset/eval_datasets/z_bench',
+    name='question',
+    reader_cfg=z_bench_reader_cfg,
+    infer_cfg=z_bench_infer_cfg)
--- a/configs/models/classic/gpt-3.5-turbo.py
+++ b/configs/models/classic/gpt-3.5-turbo.py
+from opencompass.models import OpenAI
+models = [
+    dict(abbr='GPT-3.5-turbo',
+        type=OpenAI, path='gpt-3.5-turbo', key='sk-xxx',
+        max_out_len=2048, max_seq_len=2048, batch_size=1)
+]
--- a/configs/summarizers/example.py
+++ b/configs/summarizers/example.py
+from mmengine.config import read_base
+with read_base():
+    from .groups.agieval import agieval_summary_groups
+    from .groups.mmlu import mmlu_summary_groups
+    from .groups.ceval import ceval_summary_groups
+    from .groups.bbh import bbh_summary_groups
+    from .groups.GaokaoBench import GaokaoBench_summary_groups
+    from .groups.flores import flores_summary_groups
+summarizer = dict(
+    summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
+    prompt_db=dict(
+        database_path='configs/datasets/log.json',
+        config_dir='configs/datasets',
+        blacklist='.promptignore')
+)
--- a/configs/summarizers/groups/GaokaoBench.py
+++ b/configs/summarizers/groups/GaokaoBench.py
+GaokaoBench_summary_groups = []
+# gaokao-bench
+_GaokaoBench_weights = {'2010-2022_Math_II_MCQs': 1090, '2010-2022_Math_I_MCQs': 1070, '2010-2022_History_MCQs': 1148, '2010-2022_Biology_MCQs': 900, '2010-2022_Political_Science_MCQs': 1280, '2010-2022_Physics_MCQs': 384, '2010-2022_Chemistry_MCQs': 744, '2010-2013_English_MCQs': 105, '2010-2022_Chinese_Modern_Lit': 261, '2010-2022_English_Fill_in_Blanks': 900.0, '2012-2022_English_Cloze_Test': 260, '2010-2022_Geography_MCQs': 380, '2010-2022_English_Reading_Comp': 940, '2010-2022_Chinese_Lang_and_Usage_MCQs': 240}
+GaokaoBench_summary_groups.append({'name': 'GaokaoBench', 'subsets': list(_GaokaoBench_weights.keys()), 'weights': _GaokaoBench_weights})
--- a/docs/en/_static/js/custom.js
+++ b/docs/en/_static/js/custom.js
+var collapsedSections = ['Advanced Guides', 'Tools', 'User Guides', 'Notes'];
+$(document).ready(function () {
+  $('.model-summary').DataTable({
+    "stateSave": false,
+    "lengthChange": false,
+    "pageLength": 20,
+    "order": []
+  });
+});
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
+# flake8: noqa
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+import pytorch_sphinx_theme
+from sphinx.builders.html import StandaloneHTMLBuilder
+sys.path.insert(0, os.path.abspath('../../'))
+# -- Project information -----------------------------------------------------
+project = 'OpenCompass'
+copyright = '2023, OpenCompass'
+author = 'OpenCompass Authors'
+# The full version, including alpha/beta/rc tags
+# version_file = '../../opencompass/version.py'
+# def get_version():
+#     with open(version_file, 'r') as f:
+#         exec(compile(f.read(), version_file, 'exec'))
+#     return locals()['__version__']
+release = '1.0.0'
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'myst_parser',
+    'sphinx_copybutton',
+    'sphinx_tabs.tabs',
+    'notfound.extension',
+    'sphinxcontrib.jquery',
+]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+language = 'en'
+# The master toctree document.
+root_doc = 'index'
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pytorch_sphinx_theme'
+html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+# yapf: disable
+html_theme_options = {
+    'menu': [
+        {
+            'name': 'GitHub',
+            'url': 'https://github.com/opencompass'
+        },
+    ],
+    # Specify the language of shared menu
+    'menu_lang': 'en',
+    # Disable the default edit on GitHub
+    'default_edit_on_github': False,
+}
+# yapf: enable
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_css_files = [
+    'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.css',
+    'css/readthedocs.css'
+]
+html_js_files = [
+    'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.js',
+    'js/custom.js'
+]
+# -- Options for HTMLHelp output ---------------------------------------------
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'opencompassdoc'
+# -- Options for LaTeX output ------------------------------------------------
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+}
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (root_doc, 'opencompass.tex', 'OpenCompass Documentation', author,
+     'manual'),
+]
+# -- Options for manual page output ------------------------------------------
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author],
+              1)]
+# -- Options for Texinfo output ----------------------------------------------
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (root_doc, 'opencompass', 'OpenCompass Documentation', author,
+     'OpenCompass Authors', 'AGI evaluation toolbox and benchmark.',
+     'Miscellaneous'),
+]
+# -- Options for Epub output -------------------------------------------------
+# Bibliographic Dublin Core info.
+epub_title = project
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+# A unique identification for the text.
+#
+# epub_uid = ''
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+# set priority when building html
+StandaloneHTMLBuilder.supported_image_types = [
+    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
+]
+# -- Extension configuration -------------------------------------------------
+# Ignore >>> when copying code
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
+# Auto-generated header anchors
+myst_heading_anchors = 3
+# Enable "colon_fence" extension of myst.
+myst_enable_extensions = ['colon_fence', 'dollarmath']
+# Configuration for intersphinx
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'numpy': ('https://numpy.org/doc/stable', None),
+    'torch': ('https://pytorch.org/docs/stable/', None),
+    'mmengine': ('https://mmengine.readthedocs.io/en/latest/', None),
+    'transformers':
+    ('https://huggingface.co/docs/transformers/main/en/', None),
+}
+napoleon_custom_sections = [
+    # Custom sections for data elements.
+    ('Meta fields', 'params_style'),
+    ('Data fields', 'params_style'),
+]
+# Disable docstring inheritance
+autodoc_inherit_docstrings = False
+# Mock some imports during generate API docs.
+autodoc_mock_imports = ['rich', 'attr', 'einops']
+# Disable displaying type annotations, these can be very verbose
+autodoc_typehints = 'none'
+# The not found page
+notfound_template = '404.html'
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
+# Overview
+# Installation
+1. Prepare Torch refer to [PyTorch](https://pytorch.org/). 
+Notice that OpenCompass requires `pytorch>=1.13`.
+```bash
+conda create --name opencompass python=3.8 -y
+conda activate opencompass
+conda install pytorch torchvision -c pytorch
+```
+2. Install OpenCompass:
+```bash
+git clone https://github.com/opencompass/opencompass
+cd opencompass
+pip install -r requirments/runtime.txt
+pip install -e .
+```
+3. Install humaneval (option) 
+do this if you want to eval on humaneval dataset.
+```
+git clone https://github.com/openai/human-eval.git
+cd human-eval
+pip install -r requirments.txt
+pip install -e .
+```
+Remember to remove the comments of Line48-57 and uncomment [line58](https://github.com/openai/human-eval/blob/312c5e5532f0e0470bf47f77a6243e02a61da530/human_eval/execution.py#L58) in the source code.
+# Quick tour
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
+Welcome to OpenCompass' documentation!
+==========================================
+Hands-on Roadmap of OpenCompass
+-------------------------------
+To help users quickly utilize OpenCompass, we recommend following the hands-on
+roadmap we have created for the library:
+    - For users who want to use OpenCompass, we recommend reading the GetStarted_ section first to set up the environment.
+    - For some basic usage, we suggest users read the UserGuides_.
+    - If you want to customize the algorithm, we have provided the AdvancedGuides_.
+    - If you want to adjust the prompts, you can browse the Prompt_.
+    - We also offer the Tools_.
+We always welcome *PRs* and *Issues* for the betterment of MMPretrain.
+.. _GetStarted:
+.. toctree::
+   :maxdepth: 1
+   :caption: Get Started
+   get_started.md
+.. _UserGuides:
+.. toctree::
+   :maxdepth: 1
+   :caption: UserGuides
+   user_guides/framework_overview.md
+   user_guides/config.md
+   user_guides/dataset_prepare.md
+   user_guides/models.md
+   user_guides/evaluation.md
+   user_guides/experimentation.md
+   user_guides/metrics.md
+.. _AdvancedGuides:
+.. toctree::
+   :maxdepth: 1
+   :caption: AdvancedGuides
+   advanced_guides/new_dataset.md
+   advanced_guides/new_model.md
+.. _Prompt:
+.. toctree::
+   :maxdepth: 1
+   :caption: Prompt
+   prompt/overview.md
+   prompt/few_shot.md
+   prompt/prompt_template.md
+   prompt/meta_template.md
+.. _Tools:
+.. toctree::
+   :maxdepth: 1
+   :caption: tools
+   tools.md
+.. _Notes:
+.. toctree::
+   :maxdepth: 1
+   :caption: Notes
+   notes/contribution_guide.md
+.. toctree::
+   :caption: switch language
+   English <https://OpenCompass.readthedocs.io/en/latest/>
+   简体中文 <https://OpenCompass.readthedocs.io/zh_CN/latest/>
+Indexes & Tables
+==================
+* :ref:`genindex`
+* :ref:`search`
\ No newline at end of file
--- a/docs/en/notes/contribution_guide.md
+++ b/docs/en/notes/contribution_guide.md
+# Contributing to OpenCompass
+- [Contributing to OpenCompass](#contributing-to-opencompass)
+  - [Workflow](#workflow)
+  - [Code style](#code-style)
+    - [Python](#python)
+  - [Pre-commit Hook](#pre-commit-hook)
+Thanks for your interest in contributing to OpenCompass! All kinds of contributions are welcome, including but not limited to the following.
+- Fix typo or bugs
+- Add documentation or translate the documentation into other languages
+- Add new features and components
+## Workflow
+We recommend the potential contributors follow this workflow for contribution.
+1. Fork and pull the latest OpenCompass repository, follow [get started](https://OpenCompass.readthedocs.io/en/latest/get_started.html) to setup the environment.
+2. Checkout a new branch (**do not use the master or dev branch** for PRs)
+```bash
+git checkout -b xxxx # xxxx is the name of new branch
+```
+3. Edit the related files follow the code style mentioned below
+4. Use [pre-commit hook](https://pre-commit.com/) to check and format your changes.
+5. Commit your changes
+6. Create a PR with related information
+## Code style
+### Python
+We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
+We use the following tools for linting and formatting:
+- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools.
+- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports.
+- [yapf](https://github.com/google/yapf): A formatter for Python files.
+- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files.
+- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files.
+- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.
+Style configurations of yapf and isort can be found in [setup.cfg](https://github.com/open-mmlab/OpenCompass/blob/main/setup.cfg).
+## Pre-commit Hook
+We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
+fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
+The config for a pre-commit hook is stored in [.pre-commit-config](xxxxxxx).
+After you clone the repository, you will need to install initialize pre-commit hook.
+```shell
+pip install -U pre-commit
+```
+From the repository folder
+```shell
+pre-commit install
+```
+After this on every commit check code linters and formatter will be enforced.
+> Before you create a PR, make sure that your code lints and is formatted by yapf.
\ No newline at end of file
--- a/docs/en/prompt/few_shot.md
+++ b/docs/en/prompt/few_shot.md
+# In-context Learning
\ No newline at end of file
--- a/docs/en/user_guides/dataset_prepare.md
+++ b/docs/en/user_guides/dataset_prepare.md
+# Preparing and Selecting Datasets
+This section of the tutorial mainly focuses on how to prepare the datasets supported by OpenCompass and build configuration files to complete dataset selection.
+## Directory Structure of Dataset Configuration Files
+First, let's introduce the structure under the `configs/datasets` directory in OpenCompass, as shown below:
+```
+configs/datasets/
+├── ChineseUniversal  # Ability dimension
+│   ├── CLUE_afqmc  # Dataset under this dimension
+│   │   ├── CLUE_afqmc_gen_db509b.py  # Different configuration files for this dataset
+│   │   ├── CLUE_afqmc_gen.py
+│   │   ├── CLUE_afqmc_ppl_00b348.py
+│   │   ├── CLUE_afqmc_ppl_2313cf.py
+│   │   └── CLUE_afqmc_ppl.py
+│   ├── CLUE_C3
+│   │   ├── ...
+│   ├── ...
+├── Coding
+├── collections
+├── Completion
+├── EnglishUniversal
+├── Exam
+├── glm
+├── LongText
+├── MISC
+├── NLG
+├── QA
+├── Reasoning
+├── Security
+└── Translation
+```
+In the `configs/datasets` directory structure, we have divided the datasets into over ten dimensions based on ability dimensions, such as: Chinese and English Universal, Exam, QA, Reasoning, Security, etc. Each dimension contains a series of datasets, and there are multiple dataset configurations in the corresponding folder of each dataset.
+The naming of the dataset configuration file is made up of `{dataset name}_{evaluation method}_{prompt version number}.py`. For example, `ChineseUniversal/CLUE_afqmc/CLUE_afqmc_gen_db509b.py`, this configuration file is the `CLUE_afqmc` dataset under the Chinese universal ability, the corresponding evaluation method is `gen`, i.e., generative evaluation, and the corresponding prompt version number is `db509b`; similarly, `CLUE_afqmc_ppl_00b348.py` indicates that the evaluation method is `ppl`, i.e., discriminative evaluation, and the prompt version number is `00b348`.
+In addition, files without a version number, such as: `CLUE_afqmc_gen.py`, point to the latest prompt configuration file of that evaluation method, which is usually the most accurate prompt.
+## Dataset Preparation
+The datasets supported by OpenCompass mainly include two parts:
+1. Huggingface Dataset
+[Huggingface Dataset](https://huggingface.co/datasets) provides a large number of datasets. OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
+2. OpenCompass Self-built Datasets
+In addition to supporting Huggingface's existing datasets, OpenCompass also provides some self-built CN datasets. In the future, a dataset-related Repo will be provided for users to download and use. Following the instructions in the document to place the datasets uniformly in the `./data` directory can complete dataset preparation.
+It is important to note that the Repo not only contains self-built datasets, but also includes some HF-supported datasets for testing convenience.
+## Dataset Selection
+In each dataset configuration file, the dataset will be defined in the `{}_datasets` variable, such as `afqmc_datasets` in `ChineseUniversal/CLUE_afqmc/CLUE_afqmc_gen_db509b.py`.
+```python
+afqmc_datasets = [
+    dict(
+        abbr="afqmc-dev",
+        type=AFQMCDataset_V2,
+        path="./data/CLUE/AFQMC/dev.json",
+        reader_cfg=afqmc_reader_cfg,
+        infer_cfg=afqmc_infer_cfg,
+        eval_cfg=afqmc_eval_cfg,
+    ),
+]
+```
+And `afqmc_datasets` in `ChineseUniversal/CLUE_cmnli/CLUE_cmnli_ppl_b78ad4.py`.
+```python
+cmnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='cmnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg)
+]
+```
+Take these two datasets as examples. If users want to evaluate these two datasets at the same time, they can create a new configuration file in the `configs` directory. We use the import mechanism in the `mmengine` configuration to build the part of the dataset parameters in the evaluation script, as shown below:
+```python
+from mmengine.config import read_base
+with read_base():
+    from .datasets.CLUE_afqmc.CLUE_afqmc_gen_db509b import afqmc_datasets
+    from .datasets.CLUE_cmnli.CLUE_cmnli_ppl_b78ad4 import cmnli_datasets
+datasets = []
+datasets += afqmc_datasets
+datasets += cmnli_datasets
+```
+Users can choose different abilities, different datasets and different evaluation methods configuration files to build the part of the dataset in the evaluation script according to their needs.
+For information on how to start an evaluation task and how to evaluate self-built datasets, please refer to the relevant documents.
--- a/docs/en/user_guides/evaluation.md
+++ b/docs/en/user_guides/evaluation.md
+# Evaluation Strategy
--- a/docs/zh_cn/advanced_guides/new_dataset.md
+++ b/docs/zh_cn/advanced_guides/new_dataset.md
+# 支持新数据集
+尽管 OpenCompass 已经包含了大多数常用数据集，用户在支持新数据集的时候需要完成以下几个步骤：
+1. 在 `opencompass/datasets` 文件夹新增数据集脚本 `mydataset.py`, 该脚本需要包含：
+    - 数据集及其加载方式，需要定义一个 `MyDataset` 类，实现数据集加载方法 `load` ，该方法为静态方法，需要返回 `datasets.Dataset` 类型的数据。这里我们使用 huggingface dataset 作为数据集的统一接口，避免引入额外的逻辑。具体示例如下：
+    ```python
+    import datasets
+    from .base import BaseDataset
+    class MyDataset(BaseDataset):
+        @staticmethod
+        def load(**kwargs) -> datasets.Dataset:
+            pass
+    ```
+    - （可选）如果OpenCompass已有的evaluator不能满足需要，需要用户定义 `MyDatasetlEvaluator` 类，实现评分方法 `score` ，需要根据输入的 `predictions` 和 `references` 列表，得到需要的字典。由于一个数据集可能存在多种metric，需要返回一个 metrics 以及对应 scores 的相关字典。具体示例如下：
+    ```python
+    from opencompass.openicl.icl_evaluator import BaseEvaluator
+    class MyDatasetlEvaluator(BaseEvaluator):
+        def score(self, predictions: List, references: List) -> dict:
+            pass
+    ```
+    - （可选）如果 OpenCompass 已有的 postprocesser 不能满足需要，需要用户定义 `mydataset_postprocess` 方法，根据输入的字符串得到相应后处理的结果。具体示例如下：
+    ```python
+    def mydataset_postprocess(text: str) -> str:
+        pass
+    ```
+2. 在定义好数据集加载，数据后处理以及 `evaluator` 等方法之后，需要在配置文件中新增以下配置：
+    ```python
+    from opencompass.datasets import MyDataset, MyDatasetlEvaluator, mydataset_postprocess
+    mydataset_eval_cfg = dict(
+        evaluator=dict(type=MyDatasetlEvaluator),
+        pred_postprocessor=dict(type=mydataset_postprocess))
+    mydataset_datasets = [
+        dict(
+            type=MyDataset,
+            ...,
+            reader_cfg=...,
+            infer_cfg=...,
+            eval_cfg=mydataset_eval_cfg)
+    ]
+    ```
+    配置好数据集之后，其他需要的配置文件直接参考如何启动评测任务教程即可。
\ No newline at end of file
--- a/docs/zh_cn/advanced_guides/new_model.md
+++ b/docs/zh_cn/advanced_guides/new_model.md
+# 支持新模型
+目前我们已经支持的模型有 HF 模型、部分模型 API 、自建模型和部分第三方模型。
+## 新增API模型
+新增基于API的模型，需要在 `opencompass/models` 下新建 `mymodel_api.py` 文件，继承 `BaseAPIModel`，并实现 `generate` 方法来进行推理，以及 `get_token_len` 方法来计算 token 的长度。在定义好之后修改对应配置文件名称即可。
+```python
+from ..base_api import BaseAPIModel
+class MyModelAPI(BaseAPIModel):
+    is_api: bool = True
+    def __init__(self,
+                 path: str,
+                 max_seq_len: int = 2048,
+                 query_per_second: int = 1,
+                 retry: int = 2,
+                 **kwargs):
+        super().__init__(path=path,
+                         max_seq_len=max_seq_len,
+                         meta_template=meta_template,
+                         query_per_second=query_per_second,
+                         retry=retry)
+        ...
+    def generate(
+        self,
+        inputs,
+        max_out_len: int = 512,
+        temperature: float = 0.7,
+    ) -> List[str]:
+        """Generate results given a list of inputs."""
+        pass
+    def get_token_len(self, prompt: str) -> int:
+        """Get lengths of the tokenized string."""
+        pass
+```
+## 新增第三方模型
+新增基于API的模型，需要在 `opencompass/models` 下新建 `mymodel.py` 文件，继承 `BaseModel`，并实现  `generate` 方法来进行生成式推理， `get_ppl` 方法来进行判别式推理，以及 `get_token_len` 方法来计算 token 的长度。在定义好之后修改对应配置文件名称即可。
+```python
+from ..base import BaseModel
+class MyModel(BaseModel):
+    def __init__(self,
+                 pkg_root: str,
+                 ckpt_path: str,
+                 tokenizer_only: bool = False,
+                 meta_template: Optional[Dict] = None,
+                 **kwargs):
+        ...
+    def get_token_len(self, prompt: str) -> int:
+        """Get lengths of the tokenized strings."""
+        pass
+    def generate(self, inputs: List[str], max_out_len: int) -> List[str]:
+        """Generate results given a list of inputs. """
+        pass
+    def get_ppl(self,
+                inputs: List[str],
+                mask_length: Optional[List[int]] = None) -> List[float]:
+        """Get perplexity scores given a list of inputs."""
+        pass
+```
--- a/docs/zh_cn/conf.py
+++ b/docs/zh_cn/conf.py
+# flake8: noqa
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+import pytorch_sphinx_theme
+from sphinx.builders.html import StandaloneHTMLBuilder
+sys.path.insert(0, os.path.abspath('../../'))
+# -- Project information -----------------------------------------------------
+project = 'OpenCompass'
+copyright = '2023, OpenCompass'
+author = 'OpenCompass Authors'
+# The full version, including alpha/beta/rc tags
+# version_file = '../../opencompass/version.py'
+# def get_version():
+#     with open(version_file, 'r') as f:
+#         exec(compile(f.read(), version_file, 'exec'))
+#     return locals()['__version__']
+release = "0.5.0"
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    'myst_parser',
+    'sphinx_copybutton',
+    'sphinx_tabs.tabs',
+    'notfound.extension',
+    'sphinxcontrib.jquery',
+]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+language = 'en'
+# The master toctree document.
+root_doc = 'index'
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pytorch_sphinx_theme'
+html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+# yapf: disable
+html_theme_options = {
+    'menu': [
+        {
+            'name': 'GitHub',
+            'url': 'https://github.com/open-mmlab/opencompass'
+        },
+    ],
+    # Specify the language of shared menu
+    'menu_lang': 'en',
+    # Disable the default edit on GitHub
+    'default_edit_on_github': False,
+}
+# yapf: enable
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_css_files = [
+    'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.css',
+    'css/readthedocs.css'
+]
+html_js_files = [
+    'https://cdn.datatables.net/v/bs4/dt-1.12.1/datatables.min.js',
+    'js/custom.js'
+]
+# -- Options for HTMLHelp output ---------------------------------------------
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'opencompassdoc'
+# -- Options for LaTeX output ------------------------------------------------
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+}
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (root_doc, 'opencompass.tex', 'OpenCompass Documentation', author, 'manual'),
+]
+# -- Options for manual page output ------------------------------------------
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(root_doc, 'opencompass', 'OpenCompass Documentation', [author], 1)]
+# -- Options for Texinfo output ----------------------------------------------
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (root_doc, 'opencompass', 'OpenCompass Documentation', author, 'OpenCompass Authors',
+     'AGI evaluation toolbox and benchmark.', 'Miscellaneous'),
+]
+# -- Options for Epub output -------------------------------------------------
+# Bibliographic Dublin Core info.
+epub_title = project
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+# A unique identification for the text.
+#
+# epub_uid = ''
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+# set priority when building html
+StandaloneHTMLBuilder.supported_image_types = [
+    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
+]
+# -- Extension configuration -------------------------------------------------
+# Ignore >>> when copying code
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
+# Auto-generated header anchors
+myst_heading_anchors = 3
+# Enable "colon_fence" extension of myst.
+myst_enable_extensions = ['colon_fence', 'dollarmath']
+# Configuration for intersphinx
+intersphinx_mapping = {
+    'python': ('https://docs.python.org/3', None),
+    'numpy': ('https://numpy.org/doc/stable', None),
+    'torch': ('https://pytorch.org/docs/stable/', None),
+    'mmengine': ('https://mmengine.readthedocs.io/en/latest/', None),
+    'transformers':
+    ('https://huggingface.co/docs/transformers/main/en/', None),
+}
+napoleon_custom_sections = [
+    # Custom sections for data elements.
+    ('Meta fields', 'params_style'),
+    ('Data fields', 'params_style'),
+]
+# Disable docstring inheritance
+autodoc_inherit_docstrings = False
+# Mock some imports during generate API docs.
+autodoc_mock_imports = ['rich', 'attr', 'einops']
+# Disable displaying type annotations, these can be very verbose
+autodoc_typehints = 'none'
+# The not found page
+notfound_template = '404.html'