Initial commit

be3dfa50 · jerrrrry · be3dfa50 · be3dfa50 · be3dfa50 · be3dfa50
Commit be3dfa50 authored Aug 06, 2025 by jerrrrry
20 changed files
--- a/README_ORIGIN.md
+++ b/README_ORIGIN.md
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
--- a/dataset-index.yml
+++ b/dataset-index.yml
--- a/docs/en/.readthedocs.yaml
+++ b/docs/en/.readthedocs.yaml
+version: 2
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.8"
+formats:
+    - epub
+sphinx:
+  configuration: docs/en/conf.py
+python:
+  install:
+    - requirements: requirements/docs.txt
--- a/docs/en/Makefile
+++ b/docs/en/Makefile
+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/en/_static/css/readthedocs.css
+++ b/docs/en/_static/css/readthedocs.css
+.header-logo {
+    background-image: url("../image/logo.svg");
+    background-size: 275px 80px;
+    height: 80px;
+    width: 275px;
+}
+@media screen and (min-width: 1100px) {
+  .header-logo {
+    top: -25px;
+  }
+}
+pre {
+    white-space: pre;
+}
+@media screen and (min-width: 2000px) {
+  .pytorch-content-left {
+    width: 1200px;
+    margin-left: 30px;
+  }
+  article.pytorch-article {
+    max-width: 1200px;
+  }
+  .pytorch-breadcrumbs-wrapper {
+    width: 1200px;
+  }
+  .pytorch-right-menu.scrolling-fixed {
+    position: fixed;
+    top: 45px;
+    left: 1580px;
+  }
+}
+article.pytorch-article section code {
+  padding: .2em .4em;
+  background-color: #f3f4f7;
+  border-radius: 5px;
+}
+/* Disable the change in tables */
+article.pytorch-article section table code {
+  padding: unset;
+  background-color: unset;
+  border-radius: unset;
+}
+table.autosummary td {
+  width: 50%
+}
+img.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+article.pytorch-article p.rubric {
+  font-weight: bold;
+}
--- a/docs/en/_static/image/logo.svg
+++ b/docs/en/_static/image/logo.svg
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 27.3.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="图层_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 210 36" style="enable-background:new 0 0 210 36;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#5878B4;}
+	.st1{fill:#36569B;}
+	.st2{fill:#1B3882;}
+</style>
+<g id="_x33_">
+	<g>
+		<path class="st0" d="M16.5,22.6l-6.4,3.1l5.3-0.2L16.5,22.6z M12.3,33.6l1.1-2.9l-5.3,0.2L12.3,33.6z M21.6,33.3l6.4-3.1l-5.3,0.2
+			L21.6,33.3z M25.8,22.4l-1.1,2.9l5.3-0.2L25.8,22.4z M31.5,26.2l-7.1,0.2l-1.7-1.1l1.5-4L22.2,20L19,21.5l-1.5,3.9l-2.7,1.3
+			l-7.1,0.2l-3.2,1.5l2.1,1.4l7.1-0.2l0,0l1.7,1.1l-1.5,4L16,36l3.2-1.5l1.5-3.9l0,0l2.6-1.2l0,0l7.2-0.2l3.2-1.5L31.5,26.2z
+			 M20.2,28.7c-1,0.5-2.3,0.5-3,0.1c-0.6-0.4-0.4-1.2,0.6-1.6c1-0.5,2.3-0.5,3-0.1C21.5,27.5,21.2,28.2,20.2,28.7z"/>
+	</g>
+</g>
+<g id="_x32_">
+	<g>
+		<path class="st1" d="M33.5,19.8l-1.3-6.5l-1.5,1.9L33.5,19.8z M27.5,5.1l-4.2-2.7L26,7L27.5,5.1z M20.7,5.7l1.3,6.5l1.5-1.9
+			L20.7,5.7z M26.8,20.4l4.2,2.7l-2.7-4.6L26.8,20.4z M34,22.3l-3.6-6.2l0,0l-0.5-2.7l2-2.6l-0.6-3.2l-2.1-1.4l-2,2.6l-1.7-1.1
+			l-3.7-6.3L19.6,0l0.6,3.2l3.7,6.3l0,0l0.5,2.6l0,0l-2,2.6l0.6,3.2l2.1,1.4l1.9-2.5l1.7,1.1l3.7,6.3l2.1,1.4L34,22.3z M27.5,14.6
+			c-0.6-0.4-1.3-1.6-1.5-2.6c-0.2-1,0.2-1.5,0.8-1.1c0.6,0.4,1.3,1.6,1.5,2.6C28.5,14.6,28.1,15.1,27.5,14.6z"/>
+	</g>
+</g>
+<g id="_x31_">
+	<g>
+		<path class="st2" d="M12,2.8L5.6,5.9l3.8,1.7L12,2.8z M1.1,14.4l1.3,6.5l2.6-4.8L1.1,14.4z M9.1,24l6.4-3.1l-3.8-1.7L9.1,24z
+			 M20,12.4l-1.3-6.5l-2.6,4.8L20,12.4z M20.4,14.9l-5.1-2.3l0,0l-0.5-2.7l3.5-6.5l-0.6-3.2l-3.2,1.5L11,8.1L8.3,9.4l0,0L3.2,7.1
+			L0,8.6l0.6,3.2l5.2,2.3l0.5,2.7v0l-3.5,6.6l0.6,3.2l3.2-1.5l3.5-6.5l2.6-1.2l0,0l5.2,2.4l3.2-1.5L20.4,14.9z M10.9,15.2
+			c-1,0.5-1.9,0-2.1-1c-0.2-1,0.4-2.2,1.4-2.7c1-0.5,1.9,0,2.1,1C12.5,13.5,11.9,14.7,10.9,15.2z"/>
+	</g>
+</g>
+<path id="字" class="st2" d="M49.5,26.5c-2.5,0-4.4-0.7-5.7-2c-1.8-1.6-2.6-4-2.6-7.1c0-3.2,0.9-5.5,2.6-7.1c1.3-1.3,3.2-2,5.7-2
+	c2.5,0,4.4,0.7,5.7,2c1.7,1.6,2.6,4,2.6,7.1c0,3.1-0.9,5.5-2.6,7.1C53.8,25.8,51.9,26.5,49.5,26.5z M52.9,21.8
+	c0.8-1.1,1.3-2.6,1.3-4.5c0-1.9-0.4-3.4-1.3-4.5c-0.8-1.1-2-1.6-3.4-1.6c-1.4,0-2.6,0.5-3.4,1.6c-0.9,1.1-1.3,2.6-1.3,4.5
+	c0,1.9,0.4,3.4,1.3,4.5c0.9,1.1,2,1.6,3.4,1.6C50.9,23.4,52,22.9,52.9,21.8z M70.9,14.6c1,1.1,1.5,2.7,1.5,4.9c0,2.2-0.5,4-1.5,5.1
+	c-1,1.2-2.3,1.8-3.9,1.8c-1,0-1.9-0.3-2.5-0.8c-0.4-0.3-0.7-0.7-1.1-1.2V31h-3.3V13.2h3.2v1.9c0.4-0.6,0.7-1,1.1-1.3
+	c0.7-0.6,1.6-0.9,2.6-0.9C68.6,12.9,69.9,13.5,70.9,14.6z M69,19.6c0-1-0.2-1.9-0.7-2.6c-0.4-0.8-1.2-1.1-2.2-1.1
+	c-1.2,0-2,0.6-2.5,1.7c-0.2,0.6-0.4,1.4-0.4,2.3c0,1.5,0.4,2.5,1.2,3.1c0.5,0.4,1,0.5,1.7,0.5c0.9,0,1.6-0.4,2.1-1.1
+	C68.8,21.8,69,20.8,69,19.6z M85.8,22.2c-0.1,0.8-0.5,1.5-1.2,2.3c-1.1,1.2-2.6,1.9-4.6,1.9c-1.6,0-3.1-0.5-4.3-1.6
+	c-1.2-1-1.9-2.8-1.9-5.1c0-2.2,0.6-3.9,1.7-5.1c1.1-1.2,2.6-1.8,4.4-1.8c1.1,0,2,0.2,2.9,0.6c0.9,0.4,1.6,1,2.1,1.9
+	c0.5,0.8,0.8,1.6,1,2.6c0.1,0.6,0.1,1.4,0.1,2.5h-8.7c0,1.3,0.4,2.2,1.2,2.7c0.5,0.3,1,0.5,1.7,0.5c0.7,0,1.2-0.2,1.7-0.6
+	c0.2-0.2,0.4-0.5,0.6-0.9H85.8z M82.5,18.3c-0.1-0.9-0.3-1.6-0.8-2c-0.5-0.5-1.1-0.7-1.8-0.7c-0.8,0-1.4,0.2-1.8,0.7
+	c-0.4,0.5-0.7,1.1-0.8,2H82.5z M94.3,15.7c-1.1,0-1.9,0.5-2.3,1.4c-0.2,0.5-0.3,1.2-0.3,1.9V26h-3.3V13.2h3.2v1.9
+	c0.4-0.7,0.8-1.1,1.2-1.4c0.7-0.5,1.6-0.8,2.6-0.8c1.3,0,2.4,0.3,3.2,1c0.8,0.7,1.3,1.8,1.3,3.4V26h-3.4v-7.8c0-0.7-0.1-1.2-0.3-1.5
+	C95.8,16,95.2,15.7,94.3,15.7z M115.4,24.7c-1.3,1.2-2.9,1.8-4.9,1.8c-2.5,0-4.4-0.8-5.9-2.4c-1.4-1.6-2.1-3.8-2.1-6.6
+	c0-3,0.8-5.3,2.4-7c1.4-1.4,3.2-2.1,5.4-2.1c2.9,0,5,1,6.4,2.9c0.7,1.1,1.1,2.1,1.2,3.2h-3.6c-0.2-0.8-0.5-1.5-0.9-1.9
+	c-0.7-0.8-1.6-1.1-2.9-1.1c-1.3,0-2.3,0.5-3.1,1.6c-0.8,1.1-1.1,2.6-1.1,4.5s0.4,3.4,1.2,4.4c0.8,1,1.8,1.4,3.1,1.4
+	c1.3,0,2.2-0.4,2.9-1.2c0.4-0.4,0.7-1.1,0.9-2h3.6C117.5,22,116.7,23.5,115.4,24.7z M130.9,14.8c1.1,1.4,1.6,2.9,1.6,4.8
+	c0,1.9-0.5,3.5-1.6,4.8c-1.1,1.3-2.7,2-4.9,2c-2.2,0-3.8-0.7-4.9-2c-1.1-1.3-1.6-2.9-1.6-4.8c0-1.8,0.5-3.4,1.6-4.8
+	c1.1-1.4,2.7-2,4.9-2C128.2,12.8,129.9,13.5,130.9,14.8z M126,15.6c-1,0-1.7,0.3-2.3,1c-0.5,0.7-0.8,1.7-0.8,3c0,1.3,0.3,2.3,0.8,3
+	c0.5,0.7,1.3,1,2.3,1c1,0,1.7-0.3,2.3-1c0.5-0.7,0.8-1.7,0.8-3c0-1.3-0.3-2.3-0.8-3C127.7,16,127,15.6,126,15.6z M142.1,16.7
+	c-0.3-0.6-0.8-0.9-1.7-0.9c-1,0-1.6,0.3-1.9,0.9c-0.2,0.4-0.3,0.9-0.3,1.6V26h-3.4V13.2h3.2v1.9c0.4-0.7,0.8-1.1,1.2-1.4
+	c0.6-0.5,1.5-0.8,2.5-0.8c1,0,1.8,0.2,2.4,0.6c0.5,0.4,0.9,0.9,1.1,1.5c0.4-0.8,1-1.3,1.6-1.7c0.7-0.4,1.5-0.5,2.3-0.5
+	c0.6,0,1.1,0.1,1.7,0.3c0.5,0.2,1,0.6,1.5,1.1c0.4,0.4,0.6,1,0.7,1.6c0.1,0.4,0.1,1.1,0.1,1.9l0,8.1h-3.4v-8.1
+	c0-0.5-0.1-0.9-0.2-1.2c-0.3-0.6-0.8-0.9-1.6-0.9c-0.9,0-1.6,0.4-1.9,1.1c-0.2,0.4-0.3,0.9-0.3,1.5V26h-3.4v-7.6
+	C142.4,17.6,142.3,17.1,142.1,16.7z M167,14.6c1,1.1,1.5,2.7,1.5,4.9c0,2.2-0.5,4-1.5,5.1c-1,1.2-2.3,1.8-3.9,1.8
+	c-1,0-1.9-0.3-2.5-0.8c-0.4-0.3-0.7-0.7-1.1-1.2V31h-3.3V13.2h3.2v1.9c0.4-0.6,0.7-1,1.1-1.3c0.7-0.6,1.6-0.9,2.6-0.9
+	C164.7,12.9,166,13.5,167,14.6z M165.1,19.6c0-1-0.2-1.9-0.7-2.6c-0.4-0.8-1.2-1.1-2.2-1.1c-1.2,0-2,0.6-2.5,1.7
+	c-0.2,0.6-0.4,1.4-0.4,2.3c0,1.5,0.4,2.5,1.2,3.1c0.5,0.4,1,0.5,1.7,0.5c0.9,0,1.6-0.4,2.1-1.1C164.9,21.8,165.1,20.8,165.1,19.6z
+	 M171.5,14.6c0.9-1.1,2.4-1.7,4.5-1.7c1.4,0,2.6,0.3,3.7,0.8c1.1,0.6,1.6,1.6,1.6,3.1v5.9c0,0.4,0,0.9,0,1.5c0,0.4,0.1,0.7,0.2,0.9
+	c0.1,0.2,0.3,0.3,0.5,0.4V26h-3.6c-0.1-0.3-0.2-0.5-0.2-0.7c0-0.2-0.1-0.5-0.1-0.8c-0.5,0.5-1,0.9-1.6,1.3c-0.7,0.4-1.5,0.6-2.4,0.6
+	c-1.2,0-2.1-0.3-2.9-1c-0.8-0.7-1.1-1.6-1.1-2.8c0-1.6,0.6-2.7,1.8-3.4c0.7-0.4,1.6-0.7,2.9-0.8l1.1-0.1c0.6-0.1,1.1-0.2,1.3-0.3
+	c0.5-0.2,0.7-0.5,0.7-0.9c0-0.5-0.2-0.9-0.6-1.1c-0.4-0.2-0.9-0.3-1.6-0.3c-0.8,0-1.3,0.2-1.7,0.6c-0.2,0.3-0.4,0.7-0.5,1.2h-3.2
+	C170.6,16.2,170.9,15.3,171.5,14.6z M173.9,23.6c0.3,0.3,0.7,0.4,1.1,0.4c0.7,0,1.4-0.2,2-0.6c0.6-0.4,0.9-1.2,0.9-2.3v-1.2
+	c-0.2,0.1-0.4,0.2-0.6,0.3c-0.2,0.1-0.5,0.2-0.9,0.2l-0.8,0.1c-0.7,0.1-1.2,0.3-1.5,0.5c-0.5,0.3-0.8,0.8-0.8,1.4
+	C173.5,22.9,173.6,23.3,173.9,23.6z M193.1,13.8c1,0.6,1.6,1.7,1.7,3.3h-3.3c0-0.4-0.2-0.8-0.4-1c-0.4-0.5-1-0.7-1.9-0.7
+	c-0.7,0-1.2,0.1-1.6,0.3c-0.3,0.2-0.5,0.5-0.5,0.8c0,0.4,0.2,0.7,0.5,0.8c0.3,0.2,1.5,0.5,3.5,0.9c1.3,0.3,2.3,0.8,3,1.4
+	c0.7,0.6,1,1.4,1,2.4c0,1.3-0.5,2.3-1.4,3.1c-0.9,0.8-2.4,1.2-4.4,1.2c-2,0-3.5-0.4-4.5-1.3c-1-0.9-1.4-1.9-1.4-3.2h3.4
+	c0.1,0.6,0.2,1,0.5,1.3c0.4,0.4,1.2,0.7,2.3,0.7c0.7,0,1.2-0.1,1.6-0.3c0.4-0.2,0.6-0.5,0.6-0.9c0-0.4-0.2-0.7-0.5-0.9
+	c-0.3-0.2-1.5-0.5-3.5-1c-1.4-0.4-2.5-0.8-3.1-1.3c-0.6-0.5-0.9-1.3-0.9-2.3c0-1.2,0.5-2.2,1.4-3c0.9-0.9,2.2-1.3,3.9-1.3
+	C190.8,12.9,192.1,13.2,193.1,13.8z M206.5,13.8c1,0.6,1.6,1.7,1.7,3.3h-3.3c0-0.4-0.2-0.8-0.4-1c-0.4-0.5-1-0.7-1.9-0.7
+	c-0.7,0-1.2,0.1-1.6,0.3c-0.3,0.2-0.5,0.5-0.5,0.8c0,0.4,0.2,0.7,0.5,0.8c0.3,0.2,1.5,0.5,3.5,0.9c1.3,0.3,2.3,0.8,3,1.4
+	c0.7,0.6,1,1.4,1,2.4c0,1.3-0.5,2.3-1.4,3.1c-0.9,0.8-2.4,1.2-4.4,1.2c-2,0-3.5-0.4-4.5-1.3c-1-0.9-1.4-1.9-1.4-3.2h3.4
+	c0.1,0.6,0.2,1,0.5,1.3c0.4,0.4,1.2,0.7,2.3,0.7c0.7,0,1.2-0.1,1.6-0.3c0.4-0.2,0.6-0.5,0.6-0.9c0-0.4-0.2-0.7-0.5-0.9
+	c-0.3-0.2-1.5-0.5-3.5-1c-1.4-0.4-2.5-0.8-3.1-1.3c-0.6-0.5-0.9-1.3-0.9-2.3c0-1.2,0.5-2.2,1.4-3c0.9-0.9,2.2-1.3,3.9-1.3
+	C204.2,12.9,205.5,13.2,206.5,13.8z"/>
+</svg>
--- a/docs/en/_static/image/logo_icon.svg
+++ b/docs/en/_static/image/logo_icon.svg
+<?xml version="1.0" encoding="UTF-8"?>
+<svg id="_图层_2" data-name="图层 2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 34.59 36">
+  <defs>
+    <style>
+      .cls-1 {
+        fill: #36569b;
+      }
+      .cls-2 {
+        fill: #1b3882;
+      }
+      .cls-3 {
+        fill: #5878b4;
+      }
+    </style>
+  </defs>
+  <g id="_图层_1-2" data-name="图层 1">
+    <g>
+      <g id="_3" data-name="3">
+        <path class="cls-3" d="m16.53,22.65l-6.37,3.07,5.27-.16,1.1-2.91Zm-4.19,10.95l1.12-2.91-5.27.17,4.15,2.74Zm9.3-.29l6.37-3.07-5.27.16-1.1,2.91Zm4.19-10.95l-1.12,2.91,5.27-.17-4.15-2.74Zm5.72,3.81l-7.08.23-1.73-1.14,1.5-3.95-2.06-1.36-3.16,1.53-1.48,3.89-2.67,1.29-7.14.23-3.16,1.53,2.07,1.36,7.13-.23h0s1.69,1.11,1.69,1.11l-1.51,3.98,2.06,1.36,3.16-1.53,1.5-3.95h0s2.56-1.24,2.56-1.24h0s7.23-.24,7.23-.24l3.16-1.53-2.06-1.36Zm-11.29,2.56c-.99.48-2.31.52-2.96.1-.65-.42-.37-1.15.62-1.63.99-.48,2.31-.52,2.96-.1.65.42.37,1.15-.62,1.63Z"/>
+      </g>
+      <g id="_2" data-name="2">
+        <path class="cls-1" d="m33.5,19.84l-1.26-6.51-1.46,1.88,2.72,4.63Zm-6.05-14.69l-4.16-2.74,2.71,4.64,1.45-1.89Zm-6.73.58l1.26,6.51,1.46-1.88-2.72-4.63Zm6.05,14.69l4.16,2.74-2.71-4.64-1.45,1.89Zm7.19,1.91l-3.63-6.2h0s-.53-2.74-.53-2.74l1.96-2.56-.63-3.23-2.07-1.36-1.96,2.56-1.69-1.11-3.71-6.33-2.07-1.36.63,3.23,3.68,6.28h0s.51,2.62.51,2.62h0s-1.99,2.6-1.99,2.6l.63,3.23,2.06,1.36,1.95-2.54,1.73,1.14,3.69,6.29,2.07,1.36-.63-3.23Zm-6.47-7.7c-.65-.42-1.33-1.59-1.52-2.6-.2-1.01.17-1.49.81-1.06.65.42,1.33,1.59,1.52,2.6.2,1.01-.17,1.49-.81,1.06Z"/>
+      </g>
+      <g id="_1" data-name="1">
+        <path class="cls-2" d="m11.96,2.82l-6.37,3.07,3.81,1.74,2.55-4.81ZM1.07,14.37l1.26,6.53,2.56-4.8-3.82-1.73Zm7.99,9.59l6.37-3.07-3.81-1.74-2.55,4.81Zm10.89-11.55l-1.26-6.53-2.56,4.8,3.82,1.73Zm.45,2.53l-5.13-2.32h0s-.53-2.71-.53-2.71l3.47-6.53-.63-3.24-3.16,1.53-3.42,6.43-2.67,1.29h0s-5.17-2.34-5.17-2.34l-3.16,1.53.63,3.24,5.17,2.33.51,2.65h0s-3.49,6.57-3.49,6.57l.63,3.24,3.16-1.53,3.46-6.52,2.56-1.24h0s5.24,2.37,5.24,2.37l3.16-1.53-.63-3.24Zm-9.52.24c-.99.48-1.95.04-2.14-.97-.2-1.01.44-2.22,1.43-2.69.99-.48,1.95-.04,2.14.97.2,1.01-.44,2.22-1.43,2.7Z"/>
+      </g>
+    </g>
+  </g>
+</svg>
--- a/docs/en/_static/js/custom.js
+++ b/docs/en/_static/js/custom.js
+var collapsedSections = ['Dataset Statistics'];
+$(document).ready(function () {
+  $('.dataset').DataTable({
+    "stateSave": false,
+    "lengthChange": false,
+    "pageLength": 20,
+    "order": [],
+    "language": {
+      "info": "Show _START_ to _END_ Items（Totally _TOTAL_ ）",
+      "infoFiltered": "（Filtered from _MAX_ Items）",
+      "search": "Search：",
+      "zeroRecords": "Item Not Found",
+      "paginate": {
+        "next": "Next",
+        "previous": "Previous"
+      },
+    }
+  });
+});
--- a/docs/en/_templates/404.html
+++ b/docs/en/_templates/404.html
+{% extends "layout.html" %}
+{% block body %}
+<h1>Page Not Found</h1>
+<p>
+  The page you are looking for cannot be found.
+</p>
+<p>
+  If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
+  the content table left, or go to <a href="{{ pathto(root_doc) }}">the homepage</a>.
+</p>
+<!-- <p>
+  If you cannot find documentation you want, please <a
+    href="">open an issue</a> to tell us!
+</p> -->
+{% endblock %}
--- a/docs/en/_templates/autosummary/class.rst
+++ b/docs/en/_templates/autosummary/class.rst
+.. role:: hidden
+    :class: hidden-section
+.. currentmodule:: {{ module }}
+{{ name | underline}}
+.. autoclass:: {{ name }}
+    :members:
+..
+  autogenerated from _templates/autosummary/class.rst
+  note it does not have :inherited-members:
--- a/docs/en/_templates/callable.rst
+++ b/docs/en/_templates/callable.rst
+.. role:: hidden
+    :class: hidden-section
+.. currentmodule:: {{ module }}
+{{ name | underline}}
+.. autoclass:: {{ name }}
+    :members:
+    :special-members: __call__
+..
+  autogenerated from _templates/callable.rst
+  note it does not have :inherited-members:
--- a/docs/en/advanced_guides/accelerator_intro.md
+++ b/docs/en/advanced_guides/accelerator_intro.md
+# Accelerate Evaluation Inference with vLLM or LMDeploy
+## Background
+During the OpenCompass evaluation process, the Huggingface transformers library is used for inference by default. While this is a very general solution, there are scenarios where more efficient inference methods are needed to speed up the process, such as leveraging VLLM or LMDeploy.
+- [LMDeploy](https://github.com/InternLM/lmdeploy) is a toolkit designed for compressing, deploying, and serving large language models (LLMs), developed by the [MMRazor](https://github.com/open-mmlab/mmrazor) and [MMDeploy](https://github.com/open-mmlab/mmdeploy) teams.
+- [vLLM](https://github.com/vllm-project/vllm) is a fast and user-friendly library for LLM inference and serving, featuring advanced serving throughput, efficient PagedAttention memory management, continuous batching of requests, fast model execution via CUDA/HIP graphs, quantization techniques (e.g., GPTQ, AWQ, SqueezeLLM, FP8 KV Cache), and optimized CUDA kernels.
+## Preparation for Acceleration
+First, check whether the model you want to evaluate supports inference acceleration using vLLM or LMDeploy. Additionally, ensure you have installed vLLM or LMDeploy as per their official documentation. Below are the installation methods for reference:
+### LMDeploy Installation Method
+Install LMDeploy using pip (Python 3.8+) or from [source](https://github.com/InternLM/lmdeploy/blob/main/docs/en/build.md):
+```bash
+pip install lmdeploy
+```
+### VLLM Installation Method
+Install vLLM using pip or from [source](https://vllm.readthedocs.io/en/latest/getting_started/installation.html#build-from-source):
+```bash
+pip install vllm
+```
+## Accelerated Evaluation Using VLLM or LMDeploy
+### Method 1: Using Command Line Parameters to Change the Inference Backend
+OpenCompass offers one-click evaluation acceleration. During evaluation, it can automatically convert Huggingface transformer models to VLLM or LMDeploy models for use. Below is an example code for evaluating the GSM8k dataset using the default Huggingface version of the llama3-8b-instruct model:
+```python
+# eval_gsm8k.py
+from mmengine.config import read_base
+with read_base():
+    # Select a dataset list
+    from .datasets.gsm8k.gsm8k_0shot_gen_a58960 import gsm8k_datasets as datasets
+    # Select an interested model
+    from ..models.hf_llama.hf_llama3_8b_instruct import models
+```
+Here, `hf_llama3_8b_instruct` specifies the original Huggingface model configuration, as shown below:
+```python
+from opencompass.models import HuggingFacewithChatTemplate
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='llama-3-8b-instruct-hf',
+        path='meta-llama/Meta-Llama-3-8B-Instruct',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
+    )
+]
+```
+To evaluate the GSM8k dataset using the default Huggingface version of the llama3-8b-instruct model, use:
+```bash
+python run.py config/eval_gsm8k.py
+```
+To accelerate the evaluation using vLLM or LMDeploy, you can use the following script:
+```bash
+python run.py config/eval_gsm8k.py -a vllm
+```
+or
+```bash
+python run.py config/eval_gsm8k.py -a lmdeploy
+```
+### Method 2: Accelerating Evaluation via Deployed Inference Acceleration Service API
+OpenCompass also supports accelerating evaluation by deploying vLLM or LMDeploy inference acceleration service APIs. Follow these steps:
+1. Install the openai package:
+```bash
+pip install openai
+```
+2. Deploy the inference acceleration service API for vLLM or LMDeploy. Below is an example for LMDeploy:
+```bash
+lmdeploy serve api_server meta-llama/Meta-Llama-3-8B-Instruct --model-name Meta-Llama-3-8B-Instruct --server-port 23333
+```
+Parameters for starting the api_server can be checked using `lmdeploy serve api_server -h`, such as --tp for tensor parallelism, --session-len for the maximum context window length, --cache-max-entry-count for adjusting the k/v cache memory usage ratio, etc.
+3. Once the service is successfully deployed, modify the evaluation script by changing the model configuration path to the service address, as shown below:
+```python
+from opencompass.models import OpenAISDK
+api_meta_template = dict(
+    round=[
+        dict(role='HUMAN', api_role='HUMAN'),
+        dict(role='BOT', api_role='BOT', generate=True),
+    ],
+    reserved_roles=[dict(role='SYSTEM', api_role='SYSTEM')],
+)
+models = [
+    dict(
+        abbr='Meta-Llama-3-8B-Instruct-LMDeploy-API',
+        type=OpenAISDK,
+        key='EMPTY', # API key
+        openai_api_base='http://0.0.0.0:23333/v1',  # Service address
+        path='Meta-Llama-3-8B-Instruct',  # Model name for service request
+        tokenizer_path='meta-llama/Meta-Llama-3.1-8B-Instruct', # The tokenizer name or path, if set to `None`, uses the default `gpt-4` tokenizer
+        rpm_verbose=True,  # Whether to print request rate
+        meta_template=api_meta_template,  # Service request template
+        query_per_second=1,  # Service request rate
+        max_out_len=1024,  # Maximum output length
+        max_seq_len=4096,  # Maximum input length
+        temperature=0.01,  # Generation temperature
+        batch_size=8,  # Batch size
+        retry=3,  # Number of retries
+    )
+]
+```
+## Acceleration Effect and Performance Comparison
+Below is a comparison table of the acceleration effect and performance when using VLLM or LMDeploy on a single A800 GPU for evaluating the Llama-3-8B-Instruct model on the GSM8k dataset:
+| Inference Backend | Accuracy | Inference Time (minutes:seconds) | Speedup (relative to Huggingface) |
+| ----------------- | -------- | -------------------------------- | --------------------------------- |
+| Huggingface       | 74.22    | 24:26                            | 1.0                               |
+| LMDeploy          | 73.69    | 11:15                            | 2.2                               |
+| VLLM              | 72.63    | 07:52                            | 3.1                               |
--- a/docs/en/advanced_guides/circular_eval.md
+++ b/docs/en/advanced_guides/circular_eval.md
+# CircularEval
+## Background
+For multiple-choice questions, when a Language Model (LLM) provides the correct option, it does not necessarily imply a true understanding and reasoning of the question. It could be a guess. To differentiate these scenarios and reduce LLM bias towards options, CircularEval (CircularEval) can be utilized. A multiple-choice question is augmented by shuffling its options, and if the LLM correctly answers all variations of the augmented question, it is considered correct under CircularEval.
+## Adding Your Own CircularEval Dataset
+Generally, to evaluate a dataset using CircularEval, both its loading and evaluation methods need to be rewritten. Modifications are required in both the OpenCompass main library and configuration files. We will use C-Eval as an example for explanation.
+OpenCompass main library:
+```python
+from opencompass.datasets.ceval import CEvalDataset
+from opencompass.datasets.circular import CircularDatasetMeta
+class CircularCEvalDataset(CEvalDataset, metaclass=CircularDatasetMeta):
+    # The overloaded dataset class
+    dataset_class = CEvalDataset
+    # Splits of the DatasetDict that need CircularEval. For CEvalDataset, which loads [dev, val, test], we only need 'val' and 'test' for CircularEval, not 'dev'
+    default_circular_splits = ['val', 'test']
+    # List of keys to be shuffled
+    default_option_keys = ['A', 'B', 'C', 'D']
+    # If the content of 'answer_key' is one of ['A', 'B', 'C', 'D'], representing the correct answer. This field indicates how to update the correct answer after shuffling options. Choose either this or default_answer_key_switch_method
+    default_answer_key = 'answer'
+    # If 'answer_key' content is not one of ['A', 'B', 'C', 'D'], a function can be used to specify the correct answer after shuffling options. Choose either this or default_answer_key
+    # def default_answer_key_switch_method(item, circular_pattern):
+    #     # 'item' is the original data item
+    #     # 'circular_pattern' is a tuple indicating the order after shuffling options, e.g., ('D', 'A', 'B', 'C') means the original option A is now D, and so on
+    #     item['answer'] = circular_pattern['ABCD'.index(item['answer'])]
+    #     return item
+```
+`CircularCEvalDataset` accepts the `circular_pattern` parameter with two values:
+- `circular`: Indicates a single cycle. It is the default value. ABCD is expanded to ABCD, BCDA, CDAB, DABC, a total of 4 variations.
+- `all_possible`: Indicates all permutations. ABCD is expanded to ABCD, ABDC, ACBD, ACDB, ADBC, ADCB, BACD, ..., a total of 24 variations.
+Additionally, we provide a `CircularEvaluator` to replace `AccEvaluator`. This Evaluator also accepts `circular_pattern`, and it should be consistent with the above. It produces the following metrics:
+- `acc_{origin|circular|all_possible}`: Treating each question with shuffled options as separate, calculating accuracy.
+- `perf_{origin|circular|all_possible}`: Following Circular logic, a question is considered correct only if all its variations with shuffled options are answered correctly, calculating accuracy.
+- `more_{num}_{origin|circular|all_possible}`: According to Circular logic, a question is deemed correct if the number of its variations answered correctly is greater than or equal to num, calculating accuracy.
+OpenCompass configuration file:
+```python
+from mmengine.config import read_base
+from opencompass.datasets.circular import CircularCEvalDataset
+with read_base():
+    from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
+for d in ceval_datasets:
+    # Overloading the load method
+    d['type'] = CircularCEvalDataset
+    # Renaming for differentiation from non-circular evaluation versions
+    d['abbr'] = d['abbr'] + '-circular-4'
+    # Overloading the evaluation method
+    d['eval_cfg']['evaluator'] = {'type': CircularEvaluator}
+# The dataset after the above operations looks like this:
+# dict(
+#     type=CircularCEvalDataset,
+#     path='./data/ceval/formal_ceval',  # Unchanged
+#     name='computer_network',  # Unchanged
+#     abbr='ceval-computer_network-circular-4',
+#     reader_cfg=dict(...),  # Unchanged
+#     infer_cfg=dict(...),  # Unchanged
+#     eval_cfg=dict(evaluator=dict(type=CircularEvaluator), ...),
+# )
+```
+Additionally, for better presentation of results in CircularEval, consider using the following summarizer:
+```python
+from mmengine.config import read_base
+from opencompass.summarizers import CircularSummarizer
+with read_base():
+    from ...summarizers.groups.ceval.ceval_summary_groups
+new_summary_groups = []
+for item in ceval_summary_groups:
+    new_summary_groups.append(
+        {
+            'name': item['name'] + '-circular-4',
+            'subsets': [i + '-circular-4' for i in item['subsets']],
+        }
+    )
+summarizer = dict(
+    type=CircularSummarizer,
+    # Select specific metrics to view
+    metric_types=['acc_origin', 'perf_circular'],
+    dataset_abbrs = [
+        'ceval-circular-4',
+        'ceval-humanities-circular-4',
+        'ceval-stem-circular-4',
+        'ceval-social-science-circular-4',
+        'ceval-other-circular-4',
+    ],
+    summary_groups=new_summary_groups,
+)
+```
+For more complex evaluation examples, refer to this sample code: https://github.com/open-compass/opencompass/tree/main/configs/eval_circular.py
--- a/docs/en/advanced_guides/code_eval.md
+++ b/docs/en/advanced_guides/code_eval.md
+# Code Evaluation Tutorial
+This tutorial primarily focuses on evaluating a model's coding proficiency, using `humaneval` and `mbpp` as examples.
+## pass@1
+If you only need to generate a single response to evaluate the pass@1 performance, you can directly use [configs/datasets/humaneval/humaneval_gen_8e312c.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/humaneval/humaneval_gen_8e312c.py) and [configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py](https://github.com/open-compass/opencompass/blob/main/configs/datasets/mbpp/deprecated_mbpp_gen_1e1056.py), referring to the general [quick start tutorial](../get_started/quick_start.md).
+For multilingual evaluation, please refer to the [Multilingual Code Evaluation Tutorial](./code_eval_service.md).
+## pass@k
+If you need to generate multiple responses for a single example to evaluate the pass@k performance, consider the following two situations. Here we take 10 responses as an example:
+### Typical Situation
+For most models that support the `num_return_sequences` parameter in HF's generation, we can use it directly to obtain multiple responses. Refer to the following configuration file:
+```python
+from opencompass.datasets import MBPPDatasetV2, MBPPPassKEvaluator
+with read_base():
+    from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+mbpp_datasets[0]['type'] = MBPPDatasetV2
+mbpp_datasets[0]['eval_cfg']['evaluator']['type'] = MBPPPassKEvaluator
+mbpp_datasets[0]['reader_cfg']['output_column'] = 'test_column'
+datasets = []
+datasets += humaneval_datasets
+datasets += mbpp_datasets
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        ...,
+        generation_kwargs=dict(
+            num_return_sequences=10,
+            do_sample=True,
+            top_p=0.95,
+            temperature=0.8,
+        ),
+        ...,
+    )
+]
+```
+For `mbpp`, new changes are needed in the dataset and evaluation, so we simultaneously modify the `type`, `eval_cfg.evaluator.type`, `reader_cfg.output_column` fields to accommodate these requirements.
+We also need model responses with randomness, thus setting the `generation_kwargs` parameter is necessary. Note that we need to set `num_return_sequences` to get the number of responses.
+Note: `num_return_sequences` must be greater than or equal to k, as pass@k itself is a probability estimate.
+You can specifically refer to the following configuration file [configs/eval_code_passk.py](https://github.com/open-compass/opencompass/blob/main/configs/eval_code_passk.py)
+### For Models That Do Not Support Multiple Responses
+This applies to some HF models with poorly designed APIs or missing features. In this case, we need to repeatedly construct datasets to achieve multiple response effects. Refer to the following configuration:
+```python
+from opencompass.datasets import MBPPDatasetV2, MBPPPassKEvaluator
+with read_base():
+    from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from .datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
+humaneval_datasets[0]['abbr'] = 'openai_humaneval_pass10'
+humaneval_datasets[0]['num_repeats'] = 10
+mbpp_datasets[0]['abbr'] = 'mbpp_pass10'
+mbpp_datasets[0]['num_repeats'] = 10
+mbpp_datasets[0]['type'] = MBPPDatasetV2
+mbpp_datasets[0]['eval_cfg']['evaluator']['type'] = MBPPPassKEvaluator
+mbpp_datasets[0]['reader_cfg']['output_column'] = 'test_column'
+datasets = []
+datasets += humaneval_datasets
+datasets += mbpp_datasets
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        ...,
+        generation_kwargs=dict(
+            do_sample=True,
+            top_p=0.95,
+            temperature=0.8,
+        ),
+        ...,
+    )
+]
+```
+Since the dataset's prompt has not been modified, we need to replace the corresponding fields to achieve the purpose of repeating the dataset.
+You need to modify these fields:
+- `num_repeats`: the number of times the dataset is repeated
+- `abbr`: It's best to modify the dataset abbreviation along with the number of repetitions because the number of datasets will change, preventing potential issues arising from discrepancies with the values in `.cache/dataset_size.json`.
+For `mbpp`, modify the `type`, `eval_cfg.evaluator.type`, `reader_cfg.output_column` fields as well.
+We also need model responses with randomness, thus setting the `generation_kwargs` parameter is necessary.
+You can specifically refer to the following configuration file [configs/eval_code_passk_repeat_dataset.py](https://github.com/open-compass/opencompass/blob/main/configs/eval_code_passk_repeat_dataset.py)
--- a/docs/en/advanced_guides/code_eval_service.md
+++ b/docs/en/advanced_guides/code_eval_service.md
+# Code Evaluation Docker Tutorial
+To complete the LLM code capability evaluation, we need to build a separate evaluation environment to avoid executing erroneous code in the development environment, which would inevitably cause losses. The code evaluation service currently used by OpenCompass can refer to the [code-evaluator](https://github.com/open-compass/code-evaluator) project. The following will introduce evaluation tutorials around the code evaluation service.
+1. humaneval-x
+This is a multi-programming language dataset [humaneval-x](https://huggingface.co/datasets/THUDM/humaneval-x).
+You can download the dataset from this [download link](https://github.com/THUDM/CodeGeeX2/tree/main/benchmark/humanevalx). Please download the language file (××.jsonl.gz) that needs to be evaluated and place it in the `./data/humanevalx` folder.
+The currently supported languages are `python`, `cpp`, `go`, `java`, `js`.
+2. DS1000
+This is a Python multi-algorithm library dataset [ds1000](https://github.com/xlang-ai/DS-1000).
+You can download the dataset from this [download link](https://github.com/xlang-ai/DS-1000/blob/main/ds1000_data.zip).
+The currently supported algorithm libraries are `Pandas`, `Numpy`, `Tensorflow`, `Scipy`, `Sklearn`, `Pytorch`, `Matplotlib`.
+## Launching the Code Evaluation Service
+1. Ensure you have installed Docker, please refer to [Docker installation document](https://docs.docker.com/engine/install/).
+2. Pull the source code of the code evaluation service project and build the Docker image.
+Choose the dockerfile corresponding to the dataset you need, and replace `humanevalx` or `ds1000` in the command below.
+```shell
+git clone https://github.com/open-compass/code-evaluator.git
+docker build -t code-eval-{your-dataset}:latest -f docker/{your-dataset}/Dockerfile .
+```
+3. Create a container with the following commands:
+```shell
+# Log output format
+docker run -it -p 5000:5000 code-eval-{your-dataset}:latest python server.py
+# Run the program in the background
+# docker run -itd -p 5000:5000 code-eval-{your-dataset}:latest python server.py
+# Using different ports
+# docker run -itd -p 5001:5001 code-eval-{your-dataset}:latest python server.py --port 5001
+```
+**Note:**
+- If you encounter a timeout during the evaluation of Go, please use the following command when creating the container.
+```shell
+docker run -it -p 5000:5000 -e GO111MODULE=on -e GOPROXY=https://goproxy.io code-eval-{your-dataset}:latest python server.py
+```
+4. To ensure you have access to the service, use the following command to check the inference environment and evaluation service connection status. (If both inferences and code evaluations run on the same host, skip this step.)
+```shell
+ping your_service_ip_address
+telnet your_service_ip_address your_service_port
+```
+## Local Code Evaluation
+When the model inference and code evaluation services are running on the same host or within the same local area network, direct code reasoning and evaluation can be performed. **Note: DS1000 is currently not supported, please proceed with remote evaluation.**
+### Configuration File
+We provide [the configuration file](https://github.com/open-compass/opencompass/blob/main/configs/eval_codegeex2.py) of using `humanevalx` for evaluation on `codegeex2` as reference.
+The dataset and related post-processing configurations files can be found at this [link](https://github.com/open-compass/opencompass/tree/main/configs/datasets/humanevalx) with attention paid to the `evaluator` field in the humanevalx_eval_cfg_dict.
+```python
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator
+humanevalx_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+humanevalx_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{prompt}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=1024))
+humanevalx_eval_cfg_dict = {
+    lang : dict(
+            evaluator=dict(
+                type=HumanevalXEvaluator,
+                language=lang,
+                ip_address="localhost",    # replace to your code_eval_server ip_address, port
+                port=5000),               # refer to https://github.com/open-compass/code-evaluator to launch a server
+            pred_role='BOT')
+    for lang in ['python', 'cpp', 'go', 'java', 'js']   # do not support rust now
+}
+humanevalx_datasets = [
+    dict(
+        type=HumanevalXDataset,
+        abbr=f'humanevalx-{lang}',
+        language=lang,
+        path='./data/humanevalx',
+        reader_cfg=humanevalx_reader_cfg,
+        infer_cfg=humanevalx_infer_cfg,
+        eval_cfg=humanevalx_eval_cfg_dict[lang])
+    for lang in ['python', 'cpp', 'go', 'java', 'js']
+]
+```
+### Task Launch
+Refer to the [Quick Start](../get_started.html)
+## Remote Code Evaluation
+Model inference and code evaluation services located in different machines which cannot be accessed directly require prior model inference before collecting the code evaluation results. The configuration file and inference process can be reused from the previous tutorial.
+### Collect Inference Results(Only for Humanevalx)
+In OpenCompass's tools folder, there is a script called `collect_code_preds.py` provided to process and collect the inference results after providing the task launch configuration file during startup along with specifying the working directory used corresponding to the task.
+It is the same with `-r` option in `run.py`. More details can be referred through the [documentation](https://opencompass.readthedocs.io/en/latest/get_started/quick_start.html#launching-evaluation).
+```shell
+python tools/collect_code_preds.py [config] [-r latest]
+```
+The collected results will be organized as following under the `-r` folder:
+```
+workdir/humanevalx
+├── codegeex2-6b
+│   ├── humanevalx_cpp.json
+│   ├── humanevalx_go.json
+│   ├── humanevalx_java.json
+│   ├── humanevalx_js.json
+│   └── humanevalx_python.json
+├── CodeLlama-13b
+│   ├── ...
+├── CodeLlama-13b-Instruct
+│   ├── ...
+├── CodeLlama-13b-Python
+│   ├── ...
+├── ...
+```
+For DS1000, you just need to obtain the corresponding prediction file generated by `opencompass`.
+### Code Evaluation
+Make sure your code evaluation service is started, and use `curl` to request:
+#### The following only supports Humanevalx
+```shell
+curl -X POST -F 'file=@{result_absolute_path}' -F 'dataset={dataset/language}' {your_service_ip_address}:{your_service_port}/evaluate
+```
+For example:
+```shell
+curl -X POST -F 'file=@./examples/humanevalx/python.json' -F 'dataset=humanevalx/python' localhost:5000/evaluate
+```
+The we have:
+```
+"{\"pass@1\": 37.19512195121951%}"
+```
+Additionally, we offer an extra option named `with_prompt`(Defaults to `True`), since some models(like `WizardCoder`) generate complete codes without requiring the form of concatenating prompt and prediction. You may refer to the following commands for evaluation.
+```shell
+curl -X POST -F 'file=@./examples/humanevalx/python.json' -F 'dataset=humanevalx/python' -H 'with-prompt: False' localhost:5000/evaluate
+```
+#### The following only supports DS1000
+Make sure the code evaluation service is started, then use `curl` to submit a request:
+```shell
+curl -X POST -F 'file=@./internlm-chat-7b-hf-v11/ds1000_Numpy.json' localhost:5000/evaluate
+```
+DS1000 supports additional debug parameters. Be aware that a large amount of log will be generated when it is turned on:
+- `full`: Additional print out of the original prediction for each error sample, post-processing prediction, running program, and final error.
+- `half`: Additional print out of the running program and final error for each error sample.
+- `error`: Additional print out of the final error for each error sample.
+```shell
+curl -X POST -F 'file=@./internlm-chat-7b-hf-v11/ds1000_Numpy.json' -F 'debug=error' localhost:5000/evaluate
+```
+You can also modify the `num_workers` in the same way to control the degree of parallelism.
+## Advanced Tutorial
+Besides evaluating the supported HUMANEVAList data set, users might also need:
+### Support New Dataset
+Please refer to the [tutorial on supporting new datasets](./new_dataset.md).
+### Modify Post-Processing
+1. For local evaluation, follow the post-processing section in the tutorial on supporting new datasets to modify the post-processing method.
+2. For remote evaluation, please modify the post-processing part in the tool's `collect_code_preds.py`.
+3. Some parts of post-processing could also be modified in the code evaluation service, more information will be available in the next section.
+### Debugging Code Evaluation Service
+When supporting new datasets or modifying post-processors, it is possible that modifications need to be made to the original code evaluation service. Please make changes based on the following steps:
+1. Remove the installation of the `code-evaluator` in `Dockerfile`, mount the `code-evaluator` when starting the container instead:
+```shell
+docker run -it -p 5000:5000 -v /local/path/of/code-evaluator:/workspace/code-evaluator code-eval:latest bash
+```
+2. Install and start the code evaluation service locally. At this point, any necessary modifications can be made to the local copy of the `code-evaluator`.
+```shell
+cd code-evaluator && pip install -r requirements.txt
+python server.py
+```
--- a/docs/en/advanced_guides/contamination_eval.md
+++ b/docs/en/advanced_guides/contamination_eval.md
+# Data Contamination Assessment
+**Data Contamination** refers to the phenomenon where data intended for downstream testing tasks appear in the training data of large language models (LLMs), resulting in artificially inflated performance metrics in downstream tasks (such as summarization, natural language inference, text classification), which do not accurately reflect the model's true generalization capabilities.
+Since the source of data contamination lies in the training data used by LLMs, the most direct method to detect data contamination is to collide test data with training data and then report the extent of overlap between the two. The classic GPT-3 [paper](https://arxiv.org/pdf/2005.14165.pdf) reported on this in Table C.1.
+However, today's open-source community often only publishes model parameters, not training datasets. In such cases, how to determine the presence and extent of data contamination remains unsolved. OpenCompass offers two possible solutions.
+## Contamination Data Annotation Based on Self-Built Co-Distribution Data
+Referencing the method mentioned in Section 5.2 of [Skywork](https://arxiv.org/pdf/2310.19341.pdf), we directly used the dataset [mock_gsm8k_test](https://huggingface.co/datasets/Skywork/mock_gsm8k_test) uploaded to HuggingFace by Skywork.
+In this method, the authors used GPT-4 to synthesize data similar to the original GSM8K style, and then calculated the perplexity on the GSM8K training set (train), GSM8K test set (test), and GSM8K reference set (ref). Since the GSM8K reference set was newly generated, the authors considered it as clean, not belonging to any training set of any model. They posited:
+- If the test set's perplexity is significantly lower than the reference set's, the test set might have appeared in the model's training phase;
+- If the training set's perplexity is significantly lower than the test set's, the training set might have been overfitted by the model.
+The following configuration file can be referenced:
+```python
+from mmengine.config import read_base
+with read_base():
+    from .datasets.gsm8k_contamination.gsm8k_contamination_ppl_ecdd22 import gsm8k_datasets  # includes training, test, and reference sets
+    from .models.qwen.hf_qwen_7b import models as hf_qwen_7b_model  # model under review
+    from .models.yi.hf_yi_6b import models as hf_yi_6b_model
+datasets = [*gsm8k_datasets]
+models = [*hf_qwen_7b_model, *hf_yi_6b_model]
+```
+An example output is as follows:
+```text
+dataset          version    metric       mode       internlm-7b-hf    qwen-7b-hf    yi-6b-hf    chatglm3-6b-base-hf    qwen-14b-hf    baichuan2-13b-base-hf    internlm-20b-hf    aquila2-34b-hf  ...
+---------------  ---------  -----------  -------  ----------------  ------------  ----------  ---------------------  -------------  -----------------------  -----------------  ----------------  ...
+gsm8k-train-ppl  0b8e46     average_ppl  unknown              1.5           0.78        1.37                   1.16           0.5                      0.76               1.41              0.78  ...
+gsm8k-test-ppl   0b8e46     average_ppl  unknown              1.56          1.33        1.42                   1.3            1.15                     1.13               1.52              1.16  ...
+gsm8k-ref-ppl    f729ba     average_ppl  unknown              1.55          1.2         1.43                   1.35           1.27                     1.19               1.47              1.35  ...
+```
+Currently, this solution only supports the GSM8K dataset. We welcome the community to contribute more datasets.
+Consider cite the following paper if you find it helpful:
+```bibtex
+@misc{2023opencompass,
+    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
+    author={OpenCompass Contributors},
+    howpublished = {\url{https://github.com/open-compass/opencompass}},
+    year={2023}
+}
+@misc{wei2023skywork,
+      title={Skywork: A More Open Bilingual Foundation Model},
+      author={Tianwen Wei and Liang Zhao and Lichang Zhang and Bo Zhu and Lijie Wang and Haihua Yang and Biye Li and Cheng Cheng and Weiwei Lü and Rui Hu and Chenxia Li and Liu Yang and Xilin Luo and Xuejie Wu and Lunan Liu and Wenjun Cheng and Peng Cheng and Jianhao Zhang and Xiaoyu Zhang and Lei Lin and Xiaokun Wang and Yutuan Ma and Chuanhai Dong and Yanqi Sun and Yifu Chen and Yongyi Peng and Xiaojuan Liang and Shuicheng Yan and Han Fang and Yahui Zhou},
+      year={2023},
+      eprint={2310.19341},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL}
+}
+```
+## Contamination Data Annotation Based on Classic Pre-trained Sets
+Thanks to [Contamination_Detector](https://github.com/liyucheng09/Contamination_Detector) and @liyucheng09 for providing this method.
+In this method, the authors search the test datasets (such as C-Eval, ARC, HellaSwag, etc.) using the Common Crawl database and Bing search engine, then mark each test sample as clean / question contaminated / both question and answer contaminated.
+During testing, OpenCompass
+will report the accuracy or perplexity of ceval on subsets composed of these three labels. Generally, the accuracy ranges from low to high: clean, question contaminated, both question and answer contaminated subsets. The authors believe:
+- If the performance of the three is relatively close, the contamination level of the model on that test set is light; otherwise, it is heavy.
+The following configuration file can be referenced [link](https://github.com/open-compass/opencompass/blob/main/configs/eval_contamination.py):
+```python
+from mmengine.config import read_base
+with read_base():
+    from .datasets.ceval.ceval_clean_ppl import ceval_datasets  # ceval dataset with contamination tags
+    from .models.yi.hf_yi_6b import models as hf_yi_6b_model  # model under review
+    from .models.qwen.hf_qwen_7b import models as hf_qwen_7b_model
+    from .summarizers.contamination import ceval_summarizer as summarizer  # output formatting
+datasets = [*ceval_datasets]
+models = [*hf_yi_6b_model, *hf_qwen_7b_model]
+```
+An example output is as follows:
+```text
+dataset                                         version    mode    yi-6b-hf          -                              -                                        qwen-7b-hf        -                              -                                        ...
+----------------------------------------------  ---------  ------  ----------------  -----------------------------  ---------------------------------------  ----------------  -----------------------------  ---------------------------------------  ...
+-                                               -          -       accuracy - clean  accuracy - input contaminated  accuracy - input-and-label contaminated  accuracy - clean  accuracy - input contaminated  accuracy - input-and-label contaminated  ...
+...
+ceval-humanities                                -          ppl     74.42             75.00                          82.14                                    67.44             50.00                          70.54                                    ...
+ceval-stem                                      -          ppl     53.70             57.14                          85.61                                    47.41             52.38                          67.63                                    ...
+ceval-social-science                            -          ppl     81.60             84.62                          83.09                                    76.00             61.54                          72.79                                    ...
+ceval-other                                     -          ppl     72.31             73.91                          75.00                                    58.46             39.13                          61.88                                    ...
+ceval-hard                                      -          ppl     44.35             37.50                          70.00                                    41.13             25.00                          30.00                                    ...
+ceval                                           -          ppl     67.32             71.01                          81.17                                    58.97             49.28                          67.82                                    ...
+```
+Currently, this solution only supports the C-Eval, MMLU, HellaSwag and ARC. [Contamination_Detector](https://github.com/liyucheng09/Contamination_Detector) also includes CSQA and WinoGrande, but these have not yet been implemented in OpenCompass. We welcome the community to contribute more datasets.
+Consider cite the following paper if you find it helpful:
+```bibtex
+@misc{2023opencompass,
+    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
+    author={OpenCompass Contributors},
+    howpublished = {\url{https://github.com/open-compass/opencompass}},
+    year={2023}
+}
+@article{Li2023AnOS,
+  title={An Open Source Data Contamination Report for Llama Series Models},
+  author={Yucheng Li},
+  journal={ArXiv},
+  year={2023},
+  volume={abs/2310.17589},
+  url={https://api.semanticscholar.org/CorpusID:264490711}
+}
+```
--- a/docs/en/advanced_guides/custom_dataset.md
+++ b/docs/en/advanced_guides/custom_dataset.md
+# Custom Dataset Tutorial
+This tutorial is intended for temporary and informal use of datasets. If the dataset requires long-term use or has specific needs for custom reading/inference/evaluation, it is strongly recommended to implement it according to the methods described in [new_dataset.md](./new_dataset.md).
+In this tutorial, we will introduce how to test a new dataset without implementing a config or modifying the OpenCompass source code. We support two types of tasks: multiple choice (`mcq`) and question & answer (`qa`). For `mcq`, both ppl and gen inferences are supported; for `qa`, gen inference is supported.
+## Dataset Format
+We support datasets in both `.jsonl` and `.csv` formats.
+### Multiple Choice (`mcq`)
+For `mcq` datasets, the default fields are as follows:
+- `question`: The stem of the multiple-choice question.
+- `A`, `B`, `C`, ...: Single uppercase letters representing the options, with no limit on the number. Defaults to parsing consecutive letters strating from `A` as options.
+- `answer`: The correct answer to the multiple-choice question, which must be one of the options used above, such as `A`, `B`, `C`, etc.
+Non-default fields will be read in but are not used by default. To use them, specify in the `.meta.json` file.
+An example of the `.jsonl` format:
+```jsonl
+{"question": "165+833+650+615=", "A": "2258", "B": "2263", "C": "2281", "answer": "B"}
+{"question": "368+959+918+653+978=", "A": "3876", "B": "3878", "C": "3880", "answer": "A"}
+{"question": "776+208+589+882+571+996+515+726=", "A": "5213", "B": "5263", "C": "5383", "answer": "B"}
+{"question": "803+862+815+100+409+758+262+169=", "A": "4098", "B": "4128", "C": "4178", "answer": "C"}
+```
+An example of the `.csv` format:
+```csv
+question,A,B,C,answer
+127+545+588+620+556+199=,2632,2635,2645,B
+735+603+102+335+605=,2376,2380,2410,B
+506+346+920+451+910+142+659+850=,4766,4774,4784,C
+504+811+870+445=,2615,2630,2750,B
+```
+### Question & Answer (`qa`)
+For `qa` datasets, the default fields are as follows:
+- `question`: The stem of the question & answer question.
+- `answer`: The correct answer to the question & answer question. It can be missing, indicating the dataset has no correct answer.
+Non-default fields will be read in but are not used by default. To use them, specify in the `.meta.json` file.
+An example of the `.jsonl` format:
+```jsonl
+{"question": "752+361+181+933+235+986=", "answer": "3448"}
+{"question": "712+165+223+711=", "answer": "1811"}
+{"question": "921+975+888+539=", "answer": "3323"}
+{"question": "752+321+388+643+568+982+468+397=", "answer": "4519"}
+```
+An example of the `.csv` format:
+```csv
+question,answer
+123+147+874+850+915+163+291+604=,3967
+149+646+241+898+822+386=,3142
+332+424+582+962+735+798+653+214=,4700
+649+215+412+495+220+738+989+452=,4170
+```
+## Command Line List
+Custom datasets can be directly called for evaluation through the command line.
+```bash
+python run.py \
+    --models hf_llama2_7b \
+    --custom-dataset-path xxx/test_mcq.csv \
+    --custom-dataset-data-type mcq \
+    --custom-dataset-infer-method ppl
+```
+```bash
+python run.py \
+    --models hf_llama2_7b \
+    --custom-dataset-path xxx/test_qa.jsonl \
+    --custom-dataset-data-type qa \
+    --custom-dataset-infer-method gen
+```
+In most cases, `--custom-dataset-data-type` and `--custom-dataset-infer-method` can be omitted. OpenCompass will
+set them based on the following logic:
+- If options like `A`, `B`, `C`, etc., can be parsed from the dataset file, it is considered an `mcq` dataset; otherwise, it is considered a `qa` dataset.
+- The default `infer_method` is `gen`.
+## Configuration File
+In the original configuration file, simply add a new item to the `datasets` variable. Custom datasets can be mixed with regular datasets.
+```python
+datasets = [
+    {"path": "xxx/test_mcq.csv", "data_type": "mcq", "infer_method": "ppl"},
+    {"path": "xxx/test_qa.jsonl", "data_type": "qa", "infer_method": "gen"},
+]
+```
+## Supplemental Information for Dataset `.meta.json`
+OpenCompass will try to parse the input dataset file by default, so in most cases, the `.meta.json` file is **not necessary**. However, if the dataset field names are not the default ones, or custom prompt words are required, it should be specified in the `.meta.json` file.
+The file is placed in the same directory as the dataset, with the filename followed by `.meta.json`. An example file structure is as follows:
+```tree
+.
+├── test_mcq.csv
+├── test_mcq.csv.meta.json
+├── test_qa.jsonl
+└── test_qa.jsonl.meta.json
+```
+Possible fields in this file include:
+- `abbr` (str): Abbreviation of the dataset, serving as its ID.
+- `data_type` (str): Type of dataset, options are `mcq` and `qa`.
+- `infer_method` (str): Inference method, options are `ppl` and `gen`.
+- `human_prompt` (str): User prompt template for generating prompts. Variables in the template are enclosed in `{}`, like `{question}`, `{opt1}`, etc. If `template` exists, this field will be ignored.
+- `bot_prompt` (str): Bot prompt template for generating prompts. Variables in the template are enclosed in `{}`, like `{answer}`, etc. If `template` exists, this field will be ignored.
+- `template` (str or dict): Question template for generating prompts. Variables in the template are enclosed in `{}`, like `{question}`, `{opt1}`, etc. The relevant syntax is in [here](../prompt/prompt_template.md) regarding `infer_cfg['prompt_template']['template']`.
+- `input_columns` (list): List of input fields for reading data.
+- `output_column` (str): Output field for reading data.
+- `options` (list): List of options for reading data, valid only when `data_type` is `mcq`.
+For example:
+```json
+{
+    "human_prompt": "Question: 127 + 545 + 588 + 620 + 556 + 199 =\nA. 2632\nB. 2635\nC. 2645\nAnswer: Let's think step by step, 127 + 545 + 588 + 620 + 556 + 199 = 672 + 588 + 620 + 556 + 199 = 1260 + 620 + 556 + 199 = 1880 + 556 + 199 = 2436 + 199 = 2635. So the answer is B.\nQuestion: {question}\nA. {A}\nB. {B}\nC. {C}\nAnswer: ",
+    "bot_prompt": "{answer}"
+}
+```
+or
+```json
+{
+    "template": "Question: {my_question}\nX. {X}\nY. {Y}\nZ. {Z}\nW. {W}\nAnswer:",
+    "input_columns": ["my_question", "X", "Y", "Z", "W"],
+    "output_column": "my_answer",
+}
+```
--- a/docs/en/advanced_guides/evaluation_lightllm.md
+++ b/docs/en/advanced_guides/evaluation_lightllm.md
+# Evaluation with Lightllm
+We now support the evaluation of large language models using [Lightllm](https://github.com/ModelTC/lightllm) for inference. Developed by SenseTime, LightLLM is a Python-based LLM (Large Language Model) inference and serving framework, notable for its lightweight design, easy scalability, and high-speed performance. Lightllm provides support for various large Language models, allowing users to perform model inference through Lightllm, locally deploying it as a service. During the evaluation process, OpenCompass feeds data to Lightllm through an API and processes the response. OpenCompass has been adapted for compatibility with Lightllm, and this tutorial will guide you on using OpenCompass to evaluate models with Lightllm as the inference backend.
+## Setup
+### Install OpenCompass
+Please follow the [instructions](https://opencompass.readthedocs.io/en/latest/get_started/installation.html) to install the OpenCompass and prepare the evaluation datasets.
+### Install Lightllm
+Please follow the [Lightllm homepage](https://github.com/ModelTC/lightllm) to install the Lightllm. Pay attention to aligning the versions of relevant dependencies, especially the version of the Transformers.
+## Evaluation
+We use the evaluation of Humaneval with the llama2-7B model as an example.
+### Step-1: Deploy the model locally as a service using Lightllm.
+```shell
+python -m lightllm.server.api_server --model_dir /path/llama2-7B    \
+                                     --host 0.0.0.0                 \
+                                     --port 1030                    \
+                                     --nccl_port 2066               \
+                                     --max_req_input_len 4096       \
+                                     --max_req_total_len 6144       \
+                                     --tp 1                         \
+                                     --trust_remote_code            \
+                                     --max_total_token_num 120000
+```
+\*\*Note: \*\* tp can be configured to enable TensorParallel inference on several gpus, suitable for the inference of very large models.
+\*\*Note: \*\* The max_total_token_num in the above command will affect the throughput performance during testing. It can be configured according to the documentation on the [Lightllm homepage](https://github.com/ModelTC/lightllm). As long as it does not run out of memory, it is often better to set it as high as possible.
+\*\*Note: \*\* If you want to start multiple LightLLM services on the same machine, you need to reconfigure the above port and nccl_port.
+You can use the following Python script to quickly test whether the current service has been successfully started.
+```python
+import time
+import requests
+import json
+url = 'http://localhost:8080/generate'
+headers = {'Content-Type': 'application/json'}
+data = {
+    'inputs': 'What is AI?',
+    "parameters": {
+        'do_sample': False,
+        'ignore_eos': False,
+        'max_new_tokens': 1024,
+    }
+}
+response = requests.post(url, headers=headers, data=json.dumps(data))
+if response.status_code == 200:
+    print(response.json())
+else:
+    print('Error:', response.status_code, response.text)
+```
+### Step-2: Evaluate the above model using OpenCompass.
+```shell
+python run.py configs/eval_lightllm.py
+```
+You are expected to get the evaluation results after the inference and evaluation.
+\*\*Note: \*\*In `eval_lightllm.py`, please align the configured URL with the service address from the previous step.
--- a/docs/en/advanced_guides/evaluation_lmdeploy.md
+++ b/docs/en/advanced_guides/evaluation_lmdeploy.md
+# Evaluation with LMDeploy
+We now support evaluation of models accelerated by the [LMDeploy](https://github.com/InternLM/lmdeploy). LMDeploy is a toolkit designed for compressing, deploying, and serving LLM. It has a remarkable inference performance. We now illustrate how to evaluate a model with the support of LMDeploy in OpenCompass.
+## Setup
+### Install OpenCompass
+Please follow the [instructions](https://opencompass.readthedocs.io/en/latest/get_started/installation.html) to install the OpenCompass and prepare the evaluation datasets.
+### Install LMDeploy
+Install lmdeploy via pip (python 3.8+)
+```shell
+pip install lmdeploy
+```
+The default prebuilt package is compiled on CUDA 12. However, if CUDA 11+ is required, you can install lmdeploy by:
+```shell
+export LMDEPLOY_VERSION=0.6.0
+export PYTHON_VERSION=310
+pip install https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux2014_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
+```
+## Evaluation
+When evaluating a model, it is necessary to prepare an evaluation configuration that specifies information such as the evaluation dataset, the model, and inference parameters.
+Taking [internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) as an example, the evaluation config is as follows:
+```python
+# configure the dataset
+from mmengine.config import read_base
+with read_base():
+    # choose a list of datasets
+    from .datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
+    from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
+    from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_0shot_v2_gen_a58960 import \
+        gsm8k_datasets
+    # and output the results in a chosen format
+    from .summarizers.medium import summarizer
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+# configure lmdeploy
+from opencompass.models import TurboMindModelwithChatTemplate
+# configure the model
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr=f'internlm2-chat-7b-lmdeploy',
+        # model path, which can be the address of a model repository on the Hugging Face Hub or a local path
+        path='internlm/internlm2-chat-7b',
+        # inference backend of LMDeploy. It can be either 'turbomind' or 'pytorch'.
+        # If the model is not supported by 'turbomind', it will fallback to
+        # 'pytorch'
+        backend='turbomind',
+        # For the detailed engine config and generation config, please refer to
+        # https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/messages.py
+        engine_config=dict(tp=1),
+        gen_config=dict(do_sample=False),
+        # the max size of the context window
+        max_seq_len=7168,
+        # the max number of new tokens
+        max_out_len=1024,
+        # the max number of prompts that LMDeploy receives
+        # in `generate` function
+        batch_size=5000,
+        run_cfg=dict(num_gpus=1),
+    )
+]
+```
+Place the aforementioned configuration in a file, such as "configs/eval_internlm2_lmdeploy.py". Then, in the home folder of OpenCompass, start evaluation by the following command:
+```shell
+python run.py configs/eval_internlm2_lmdeploy.py -w outputs
+```
+You are expected to get the evaluation results after the inference and evaluation.