Update version according to github

b0f4f53a · Rayyyyy · 392df446 · b0f4f53a · b0f4f53a · b0f4f53a
Commit b0f4f53a authored May 29, 2024 by Rayyyyy
20 changed files
--- a/datasets/simple_wikipedia_v1/README
+++ b/datasets/simple_wikipedia_v1/README
--- a/datasets/simple_wikipedia_v1/simple_wiki_pair.txt
+++ b/datasets/simple_wikipedia_v1/simple_wiki_pair.txt
--- a/datasets/simple_wikipedia_v1/wiki.simple
+++ b/datasets/simple_wikipedia_v1/wiki.simple
--- a/datasets/simple_wikipedia_v1/wiki.unsimplified
+++ b/datasets/simple_wikipedia_v1/wiki.unsimplified
--- a/docs/Makefile
+++ b/docs/Makefile

 docs:
-	sphinx-build -c . -a -E .. _build
\ No newline at end of file
+	sphinx-build -c . -a -E .. _build
+
+docs-quick:
+	sphinx-build -c . .. _build
\ No newline at end of file
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@@ -24,4 +24,88 @@ dl.class > dt {

 .wy-side-nav-search {
    padding-top: 0px;
-}
\ No newline at end of file
+}
+
+.components {
+    display: flex;
+    flex-flow: row wrap;
+}
+
+.components > .box {
+    flex: 1;
+    margin: 0.5rem;
+    padding: 1rem;
+    border-style: solid;
+    border-width: 1px;
+    border-radius: 0.5rem;
+    border-color: rgb(55 65 81);
+    background-color: #e3e3e3;
+    color: #404040; /* Override the colors imposed by <a href> */
+}
+
+.components > .box:nth-child(1) > .header {
+    background-image: linear-gradient(to bottom right, #60a5fa, #3b82f6);
+}
+
+.components > .box:nth-child(2) > .header {
+    background-image: linear-gradient(to bottom right, #fb923c, #f97316);
+}
+
+.components > .box:nth-child(3) > .header {
+    background-image: linear-gradient(to bottom right, #f472b6, #ec4899);
+}
+
+.components > .box:nth-child(4) > .header {
+    background-image: linear-gradient(to bottom right, #a78bfa, #8b5cf6);
+}
+
+.components > .box:nth-child(5) > .header {
+    background-image: linear-gradient(to bottom right, #34d399, #10b981);
+}
+
+.components > .optional {
+    background: repeating-linear-gradient(
+        135deg,
+        #f1f1f1,
+        #f1f1f1 25px,
+        #e3e3e3 25px,
+        #e3e3e3 50px
+    );
+}
+
+.components > .box > .header {
+    border-style: solid;
+    border-width: 1px;
+    border-radius: 0.5rem;
+    border-color: rgb(55 65 81);
+    padding: 0.5rem;
+    text-align: center;
+    margin-bottom: 0.5rem;
+    font-weight: bold;
+    color: white;
+}
+
+.sidebar p {
+    font-size: 100% !important;
+}
+
+.training-arguments {
+    background-color: #f3f6f6;
+    border: 1px solid #e1e4e5;
+}
+
+.training-arguments > .header {
+    font-weight: 700;
+    padding: 6px 12px;
+    background: #e1e4e5;
+}
+
+.training-arguments > .table {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(15em, 1fr));
+}
+
+.training-arguments > .table > a {
+    padding: 0.5rem;
+    border: 1px solid #e1e4e5;
+}
--- a/docs/_themes/sphinx_rtd_theme/__init__.py
+++ b/docs/_themes/sphinx_rtd_theme/__init__.py
@@ -8,7 +8,6 @@ from os import path

 import sphinx

-
 __version__ = "0.5.0"
 __version_full__ = __version__


--- a/docs/_themes/sphinx_rtd_theme/footer.html
+++ b/docs/_themes/sphinx_rtd_theme/footer.html
@@ -24,9 +24,6 @@
        &copy; {% trans %}Copyright{% endtrans %} {{ copyright }}
      {%- endif %}
    {%- endif %}
-
-       &bull; <a href="/docs/contact.html">Contact</a>
-
    {%- if build_id and build_url %}
      <span class="build">
        {# Translators: Build is a noun, not a verb #}

--- a/docs/_themes/sphinx_rtd_theme/layout.html
+++ b/docs/_themes/sphinx_rtd_theme/layout.html
@@ -121,8 +121,12 @@
            </a>

            <div style="display: flex; justify-content: center;">
-              <div id="twitter-button">
+              <!-- This snippet adds a "Follow SBERT on Twitter" button. I'll remove it as Nils doesn't post about SBERT anmymore -->
+              <!-- <div id="twitter-button">
                <a href="https://twitter.com/Nils_Reimers" target="_blank" title="Follow SBERT on Twitter"><img src="/_static/Twitter_Logo_White.svg" height="20" style="margin: 0px 10px 0px -10px;"> </a>
+              </div> -->
+              <div id="hf-button">
+                <a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
              </div>
              <div id="github-button"></div>
            </div>

--- a/docs/_themes/sphinx_rtd_theme/search.html
+++ b/docs/_themes/sphinx_rtd_theme/search.html
@@ -5,22 +5,23 @@
    Template for the search page.

    :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
+    :license: BSD, see https://github.com/sphinx-doc/sphinx/blob/master/LICENSE for details.
 #}
 {%- extends "layout.html" %}
 {% set title = _('Search') %}
 {% set display_vcs_links = False %}
 {%- block scripts %}
    {{ super() }}
-    <script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
+    <script src="{{ pathto('_static/searchtools.js', 1) }}"></script>
+    <script src="{{ pathto('_static/language_data.js', 1) }}"></script>
 {%- endblock %}
 {% block footer %}
-  <script type="text/javascript">
+  <script>
    jQuery(function() { Search.loadIndex("{{ pathto('searchindex.js', 1) }}"); });
  </script>
  {# this is used when loading the search index using $.ajax fails,
     such as on Chrome for documents on localhost #}
-  <script type="text/javascript" id="searchindexloader"></script>
+  <script id="searchindexloader"></script>
  {{ super() }}
 {% endblock %}
 {% block body %}

--- a/docs/_themes/sphinx_rtd_theme/theme.conf
+++ b/docs/_themes/sphinx_rtd_theme/theme.conf
@@ -8,7 +8,7 @@ canonical_url =
 analytics_id =
 collapse_navigation = True
 sticky_navigation = True
-navigation_depth = 4
+navigation_depth =
 includehidden = True
 titles_only =
 logo_only =

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -14,15 +14,19 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))

-from recommonmark.transform import AutoStructify
+import datetime
+import importlib
+import inspect
 import os
+
+from recommonmark.transform import AutoStructify
 from sphinx.domains import Domain
-import datetime
+
 # -- Project information -----------------------------------------------------

-project = "Sentence-Transformers"
-copyright = str(datetime.datetime.now().year) + ", Nils Reimers"
-author = "Nils Reimers"
+project = "Sentence Transformers"
+copyright = str(datetime.datetime.now().year)
+author = "Nils Reimers, Tom Aarsen"


 # -- General configuration ---------------------------------------------------
@@ -30,7 +34,16 @@ author = "Nils Reimers"
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ["sphinx.ext.autodoc", "recommonmark", "sphinx_markdown_tables"]
+extensions = [
+    "sphinx.ext.napoleon",
+    "sphinx.ext.autodoc",
+    "recommonmark",
+    "sphinx_markdown_tables",
+    "sphinx_copybutton",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.linkcode",
+    "sphinx_inline_tabs",
+]

 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -38,7 +51,24 @@ templates_path = ["_templates"]
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "nr_examples"]
+exclude_patterns = [
+    "_build",
+    "Thumbs.db",
+    ".DS_Store",
+    "nr_examples",
+    "archived",
+    "dist",
+    "build",
+    "output",
+    "models",
+    "model_card_template.md",
+]
+
+intersphinx_mapping = {
+    "datasets": ("https://huggingface.co/docs/datasets/main/en/", None),
+    "transformers": ("https://huggingface.co/docs/transformers/main/en/", None),
+    "torch": ("https://pytorch.org/docs/stable/", None),
+}


 # -- Options for HTML output -------------------------------------------------
@@ -49,7 +79,11 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "nr_examples"]
 html_theme = "sphinx_rtd_theme"
 html_theme_path = ["_themes"]

-html_theme_options = {"logo_only": True, "canonical_url": "https://www.sbert.net"}
+html_theme_options = {
+    "logo_only": True,
+    "canonical_url": "https://www.sbert.net",
+    "collapse_navigation": False,
+}

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -78,6 +112,43 @@ html_favicon = "img/favicon.ico"
 autoclass_content = "both"


+# https://github.com/readthedocs/sphinx-autoapi/issues/202#issuecomment-907582382
+def linkcode_resolve(domain, info):
+    # Non-linkable objects from the starter kit in the tutorial.
+    if domain == "js" or info["module"] == "connect4":
+        return
+
+    assert domain == "py", "expected only Python objects"
+
+    mod = importlib.import_module(info["module"])
+    if "." in info["fullname"]:
+        objname, attrname = info["fullname"].split(".")
+        obj = getattr(mod, objname)
+        try:
+            # object is a method of a class
+            obj = getattr(obj, attrname)
+        except AttributeError:
+            # object is an attribute of a class
+            return None
+    else:
+        obj = getattr(mod, info["fullname"])
+    obj = inspect.unwrap(obj)
+
+    try:
+        file = inspect.getsourcefile(obj)
+        lines = inspect.getsourcelines(obj)
+    except TypeError:
+        # e.g. object is a typing.Union
+        return None
+    file = os.path.relpath(file, os.path.abspath(".."))
+    if not file.startswith("sentence_transformers"):
+        # e.g. object is a typing.NewType
+        return None
+    start, end = lines[1], lines[1] + len(lines[0]) - 1
+
+    return f"https://github.com/UKPLab/sentence-transformers/blob/master/{file}#L{start}-L{end}"
+
+
 class GithubURLDomain(Domain):
    """
    Resolve .py links to their respective Github URL

--- a/docs/cross_encoder/pretrained_models.md
+++ b/docs/cross_encoder/pretrained_models.md
+# Pretrained Models
+
+We have released various pre-trained Cross Encoder models via our [Cross Encoder Hugging Face organization](https://huggingface.co/models?author=cross-encoder). Additionally, numerous community CrossEncoder models have been publicly released on the Hugging Face Hub.
+
+Each of these models can be easily downloaded and used like so:
+
+```python
+from sentence_transformers import CrossEncoder
+import torch
+
+model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", default_activation_function=torch.nn.Sigmoid())
+scores = model.predict([
+    ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
+    ("How many people live in Berlin?", "Berlin is well known for its museums."),
+])
+# => array([0.9998173 , 0.01312432], dtype=float32)
+```
+
+Cross-Encoders require text pairs as inputs and output a score 0...1 (if the Sigmoid activation function is used). They do not work for individual sentences and they don't compute embeddings for individual texts.
+
+## MS MARCO
+[MS MARCO Passage Retrieval](https://github.com/microsoft/MSMARCO-Passage-Ranking) is a large dataset with real user queries from Bing search engine with annotated relevant text passages.
+
+```eval_rst
+.. note::
+    You can initialize these models with ``default_activation_function=torch.nn.Sigmoid()`` to force the model to return scores between 0 and 1. Otherwise, the raw value can reasonably range between -10 and 10.
+```
+
+- [cross-encoder/ms-marco-TinyBERT-L-2-v2](https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2) - MRR@10 on MS Marco Dev Set: 32.56
+- [cross-encoder/ms-marco-MiniLM-L-2-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-2-v2) - MRR@10 on MS Marco Dev Set: 34.85
+- [cross-encoder/ms-marco-MiniLM-L-4-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2) - MRR@10 on MS Marco Dev Set: 37.70
+- [cross-encoder/ms-marco-MiniLM-L-6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-6-v2) - MRR@10 on MS Marco Dev Set: 39.01
+- [cross-encoder/ms-marco-MiniLM-L-12-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-12-v2) - MRR@10 on MS Marco Dev Set: 39.02
+
+For details on the usage, see [Retrieve & Re-Rank](../../examples/applications/retrieve_rerank/README.md) or [MS MARCO Cross-Encoders](../pretrained-models/ce-msmarco.md).
+
+## SQuAD (QNLI)
+
+QNLI is based on the [SQuAD dataset](https://rajpurkar.github.io/SQuAD-explorer/) ([HF](https://huggingface.co/datasets/rajpurkar/squad)) and was introduced by the [GLUE Benchmark](https://arxiv.org/abs/1804.07461) ([HF](https://huggingface.co/datasets/nyu-mll/glue)). Given a passage from Wikipedia, annotators created questions that are answerable by that passage.
+
+- [cross-encoder/qnli-distilroberta-base](https://huggingface.co/cross-encoder/qnli-distilroberta-base) - Accuracy on QNLI dev set: 90.96
+- [cross-encoder/qnli-electra-base](https://huggingface.co/cross-encoder/qnli-electra-base) - Accuracy on QNLI dev set: 93.21
+
+## STSbenchmark
+The following models can be used like this:
+```python
+from sentence_transformers import CrossEncoder
+
+model = CrossEncoder("cross-encoder/stsb-roberta-base")
+scores = model.predict([("It's a wonderful day outside.", "It's so sunny today!"), ("It's a wonderful day outside.", "He drove to work earlier.")])
+# => array([0.60443085, 0.00240758], dtype=float32)
+```
+
+They return a score  0...1 indicating the semantic similarity of the given sentence pair.
+- [cross-encoder/stsb-TinyBERT-L-4](https://huggingface.co/cross-encoder/stsb-TinyBERT-L-4) - STSbenchmark test performance: 85.50
+- [cross-encoder/stsb-distilroberta-base](https://huggingface.co/cross-encoder/stsb-distilroberta-base) - STSbenchmark test performance: 87.92
+- [cross-encoder/stsb-roberta-base](https://huggingface.co/cross-encoder/stsb-roberta-base) - STSbenchmark test performance: 90.17
+- [cross-encoder/stsb-roberta-large](https://huggingface.co/cross-encoder/stsb-roberta-large) - STSbenchmark test performance: 91.47 
+
+## Quora Duplicate Questions
+These models have been trained on the [Quora duplicate questions dataset](https://huggingface.co/datasets/sentence-transformers/quora-duplicates). They can used like the STSb models and give a score 0...1 indicating the probability that two questions are duplicate questions.
+
+- [cross-encoder/quora-distilroberta-base](https://huggingface.co/cross-encoder/quora-distilroberta-base) - Average Precision dev set: 87.48
+- [cross-encoder/quora-roberta-base](https://huggingface.co/cross-encoder/quora-roberta-base) - Average Precision dev set: 87.80
+- [cross-encoder/quora-roberta-large](https://huggingface.co/cross-encoder/quora-roberta-large) - Average Precision dev set: 87.91
+
+```eval_rst
+.. note::
+    The model don't work for question similarity. The question *How to learn Java* and *How to learn Python* will get a low score, as these questions are not duplicates. For question similarity, the respective bi-encoder trained on the Quora dataset yields much more meaningful results.
+```
+
+## NLI
+Given two sentences, are these contradicting each other, entailing one the other or are these netural? The following models were trained on the [SNLI](https://huggingface.co/datasets/stanfordnlp/snli) and [MultiNLI](https://huggingface.co/datasets/nyu-mll/multi_nli) datasets.
+- [cross-encoder/nli-deberta-v3-base](https://huggingface.co/cross-encoder/nli-deberta-v3-base) - Accuracy on MNLI mismatched set: 90.04
+- [cross-encoder/nli-deberta-base](https://huggingface.co/cross-encoder/nli-deberta-base) - Accuracy on MNLI mismatched set: 88.08
+- [cross-encoder/nli-deberta-v3-xsmall](https://huggingface.co/cross-encoder/nli-deberta-v3-xsmall) - Accuracy on MNLI mismatched set:  87.77
+- [cross-encoder/nli-deberta-v3-small](https://huggingface.co/cross-encoder/nli-deberta-v3-small) - Accuracy on MNLI mismatched set: 87.55
+- [cross-encoder/nli-roberta-base](https://huggingface.co/cross-encoder/nli-roberta-base) - Accuracy on MNLI mismatched set: 87.47
+- [cross-encoder/nli-MiniLM2-L6-H768](https://huggingface.co/cross-encoder/nli-MiniLM2-L6-H768) - Accuracy on MNLI mismatched set: 86.89  
+- [cross-encoder/nli-distilroberta-base](https://huggingface.co/cross-encoder/nli-distilroberta-base) - Accuracy on MNLI mismatched set: 83.98
+
+```python
+from sentence_transformers import CrossEncoder
+
+model = CrossEncoder("cross-encoder/nli-deberta-v3-base")
+scores = model.predict([
+    ("A man is eating pizza", "A man eats something"),
+    ("A black race car starts up in front of a crowd of people.", "A man is driving down a lonely road."),
+])
+
+# Convert scores to labels
+label_mapping = ["contradiction", "entailment", "neutral"]
+labels = [label_mapping[score_max] for score_max in scores.argmax(axis=1)]
+# => ['entailment', 'contradiction']
+```
+
+## Community Models
+
+Some notable models from the Community include:
+
+- [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)
+- [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)
+- [BAAI/bge-reranker-v2-m3](https://huggingface.co/BAAI/bge-reranker-v2-m3)
+- [BAAI/bge-reranker-v2-gemma](https://huggingface.co/BAAI/bge-reranker-v2-gemma)
+- [BAAI/bge-reranker-v2-minicpm-layerwise](https://huggingface.co/BAAI/bge-reranker-v2-minicpm-layerwise)
+- [jinaai/jina-reranker-v1-tiny-en](https://huggingface.co/jinaai/jina-reranker-v1-tiny-en)
+- [jinaai/jina-reranker-v1-turbo-en](https://huggingface.co/jinaai/jina-reranker-v1-turbo-en)
+- [mixedbread-ai/mxbai-rerank-xsmall-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1)
+- [mixedbread-ai/mxbai-rerank-base-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1)
+- [mixedbread-ai/mxbai-rerank-large-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1)
+- [maidalun1020/bce-reranker-base_v1](https://huggingface.co/maidalun1020/bce-reranker-base_v1)
\ No newline at end of file
--- a/docs/cross_encoder/training/examples.md
+++ b/docs/cross_encoder/training/examples.md
+
+# Training Examples
+
+See the following examples how to train Cross-Encoders:
+
+- [training_stsbenchmark.py](../../../examples/training/cross-encoder/training_stsbenchmark.py) - Example how to train for Semantic Textual Similarity (STS) on the STS benchmark dataset.
+- [training_quora_duplicate_questions.py](../../../examples/training/cross-encoder/training_quora_duplicate_questions.py) - Example how to train a Cross-Encoder to predict if two questions are duplicates. Uses Quora Duplicate Questions as training dataset.
+- [training_nli.py](../../../examples/training/cross-encoder/training_nli.py) - Example for a multilabel classification task for Natural Language Inference (NLI) task.
+
+```eval_rst
+.. toctree::
+   :maxdepth: 1
+   :caption: Supervised Learning
+
+   ../../../examples/training/ms_marco/cross_encoder_README
+```
\ No newline at end of file
--- a/docs/cross_encoder/training_overview.md
+++ b/docs/cross_encoder/training_overview.md
+
+# Training Overview
+
+```eval_rst
+.. note::
+    The CrossEncoder training approach has not been updated in v3.0 when `training Sentence Transformer models <../sentence_transformer/training_overview.html>`_ was improved. Improving training CrossEncoders is planned for a future major update.
+```
+
+The `CrossEncoder` class is a wrapper around Huggingface `AutoModelForSequenceClassification`, but with some methods to make training and predicting scores a little bit easier. The saved models are 100% compatible with Huggingface and can also be loaded with their classes.
+
+First, you need some sentence pair data. You can either have a continuous score, like:
+
+```eval_rst
+
+.. sidebar:: Documentation
+
+    - :class:`~sentence_transformers.readers.InputExample`
+```
+
+```python
+from sentence_transformers import InputExample
+
+train_samples = [
+    InputExample(texts=["sentence1", "sentence2"], label=0.3),
+    InputExample(texts=["Another", "pair"], label=0.8),
+]
+```
+
+Or you have distinct classes as in the [training_nli.py](../../examples/training/cross-encoder/training_nli.py) example:
+```python
+from sentence_transformers import InputExample
+
+label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}
+train_samples = [
+    InputExample(texts=["sentence1", "sentence2"], label=label2int["neutral"]),
+    InputExample(texts=["Another", "pair"], label=label2int["entailment"]),
+]
+```
+
+Then, you define the base model and the number of labels. You can take any [Hugging Face pre-trained model](https://huggingface.co/models) that is compatible with AutoModel:
+```
+model = CrossEncoder('distilroberta-base', num_labels=1)
+```
+
+For binary tasks and tasks with continuous scores (like STS), we set num_labels=1. For classification tasks, we set it to the number of labels we have.
+
+```eval_rst
+
+We start the training by calling :meth:`CrossEncoder.fit <sentence_transformers.cross_encoder.CrossEncoder.fit>`:
+
+.. sidebar:: Documentation
+
+    - :class:`~sentence_transformers.cross_encoder.CrossEncoder`
+    - :meth:`CrossEncoder.fit <sentence_transformers.cross_encoder.CrossEncoder.fit>`
+
+::
+
+    model.fit(
+        train_dataloader=train_dataloader,
+        evaluator=evaluator,
+        epochs=num_epochs,
+        warmup_steps=warmup_steps,
+        output_path=model_save_path,
+    )
+```
\ No newline at end of file
--- a/docs/cross_encoder/usage/usage.rst
+++ b/docs/cross_encoder/usage/usage.rst
+
+Usage
+=====
+
+Characteristics of Cross Encoder (a.k.a reranker) models:
+
+1. Calculates a **similarity score** given **pairs of texts**.
+2. Generally provides **superior performance** compared to a Sentence Transformer (a.k.a. bi-encoder) model.
+3. Often **slower** than a Sentence Transformer model, as it requires computation for each pair rather than each text.
+4. Due to the previous 2 characteristics, Cross Encoders are often used to **re-rank the top-k results** from a Sentence Transformer model.
+
+Once you have installed `installed <installation.md>`_ Sentence Transformers, you can easily use Cross Encoder models:
+
+.. sidebar:: Documentation
+
+   1. :class:`~sentence_transformers.cross_encoder.CrossEncoder`
+   2. :meth:`CrossEncoder.predict <sentence_transformers.cross_encoder.CrossEncoder.predict>`
+   3. :meth:`CrossEncoder.rank <sentence_transformers.cross_encoder.CrossEncoder.rank>`
+
+   .. note::
+      MS Marco models return logits rather than scores between 0 and 1. Load the :class:`~sentence_transformers.cross_encoder.CrossEncoder` with ``default_activation_function=torch.nn.Sigmoid()`` to get scores between 0 and 1. This does not affect the ranking.
+
+::
+
+   from sentence_transformers import CrossEncoder
+   
+   # 1. Load a pre-trained CrossEncoder model
+   model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
+
+   # 2. Predict scores for a pair of sentences
+   scores = model.predict([
+       ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
+       ("How many people live in Berlin?", "Berlin is well known for its museums."),
+   ])
+   # => array([ 8.607138 , -4.3200774], dtype=float32)
+   
+   # 3. Rank a list of passages for a query
+   query = "How many people live in Berlin?"
+   passages = [
+       "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.",
+       "Berlin is well known for its museums.",
+       "In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.",
+       "The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.",
+       "The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019",
+       "An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.",
+       "Berlin is subdivided into 12 boroughs or districts (Bezirke).",
+       "In 2015, the total labour force in Berlin was 1.85 million.",
+       "In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.",
+       "Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.",
+   ]
+   ranks = model.rank(query, passages)
+   
+   # Print the scores
+   print("Query:", query)
+   for rank in ranks:
+       print(f"{rank['score']:.2f}\t{passages[rank['corpus_id']]}")
+   """
+   Query: How many people live in Berlin?
+   8.92    The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.
+   8.61    Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.
+   8.24    An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.
+   7.60    In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.
+   6.35    In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.
+   5.42    Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.
+   3.45    In 2015, the total labour force in Berlin was 1.85 million.
+   0.33    Berlin is subdivided into 12 boroughs or districts (Bezirke).
+   -4.24   The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019
+   -4.32   Berlin is well known for its museums.
+   """
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Tasks
+
+   ../../../examples/applications/retrieve_rerank/README
--- a/docs/img/hf-logo.svg
+++ b/docs/img/hf-logo.svg
--- a/docs/installation.md
+++ b/docs/installation.md
 # Installation

-We recommend **Python 3.8** or higher, **[PyTorch 1.11.0](https://pytorch.org/get-started/locally/)** or higher and **[transformers v4.32.0](https://github.com/huggingface/transformers)** or higher.
+We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**. There are three options to install Sentence Transformers:
+* **Default:** This allows for loading, saving, and inference (i.e., getting embeddings) of models.
+* **Default and Training**: All of the above plus training.
+* **Development**: All of the above plus some dependencies for developing Sentence Transformers, see [Editable Install](#editable-install).

-## Install SentenceTransformers
+## Install with pip

-**Install with pip**
+```eval_rst
+
+.. tab:: Default
+
+    ::
+
+        pip install -U sentence-transformers
+
+.. tab:: Default and Training
+
+    ::
+
+        pip install -U "sentence-transformers[train]"
+
+    To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
+
+        pip install wandb
+    
+    And to track your Carbon Emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
+
+        pip install codecarbon
+
+.. tab:: Development
+
+    ::
+
+        pip install -U "sentence-transformers[dev]"

-Install the *sentence-transformers* with `pip`:
-```
-pip install -U sentence-transformers
 ```

-**Install with conda**
+## Install with Conda
+
+```eval_rst
+
+.. tab:: Default
+
+    ::
+
+        conda install -c conda-forge sentence-transformers
+
+.. tab:: Default and Training
+
+    ::
+
+        conda install -c conda-forge sentence-transformers accelerate datasets
+
+    To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
+
+        pip install wandb
+    
+    And to track your Carbon Emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
+
+        pip install codecarbon
+
+.. tab:: Development
+
+    ::
+
+        conda install -c conda-forge sentence-transformers accelerate datasets pre-commit pytest ruff

-Apple silicon Installation of *sentence-transformers*
 ```
-conda install -c conda-forge sentence-transformers
+
+## Install from Source
+
+You can install ``sentence-transformers`` directly from source to take advantage of the bleeding edge `master` branch rather than the latest stable release:
+
+```eval_rst
+
+.. tab:: Default
+
+    ::
+
+        pip install git+https://github.com/UKPLab/sentence-transformers.git
+
+.. tab:: Default and Training
+
+    ::
+
+        pip install -U "sentence-transformers[train] @ git+https://github.com/UKPLab/sentence-transformers.git"
+
+    To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
+
+        pip install wandb
+    
+    And to track your carbon emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
+
+        pip install codecarbon
+
+.. tab:: Development
+
+    ::
+
+        pip install -U "sentence-transformers[dev] @ git+https://github.com/UKPLab/sentence-transformers.git"
+
 ```

-**Install from source**
+## Editable Install
+
+If you want to make changes to ``sentence-transformers``, you will need an editable install. Clone the repository and install it with these commands:
+```
+git clone https://github.com/UKPLab/sentence-transformers
+cd sentence-transformers
+pip install -e ".[train,dev]"
+```

-Alternatively, you can also clone the latest version from the [repository](https://github.com/UKPLab/sentence-transformers) and install it directly from the source code:
-````
-pip install -e .
-```` 
+These commands will link the new `sentence-transformers` folder and your Python library paths, such that this folder will be used when importing `sentence-transformers`.

 ## Install PyTorch with CUDA support

-If you want to use a GPU / CUDA, you must install PyTorch with the matching CUDA Version. Follow
-[PyTorch - Get Started](https://pytorch.org/get-started/locally/) for further details how to install PyTorch.
+To use a GPU/CUDA, you must install PyTorch with CUDA support. Follow [PyTorch - Get Started](https://pytorch.org/get-started/locally/) for installation steps.
\ No newline at end of file
--- a/docs/package_reference/cross_encoder/cross_encoder.md
+++ b/docs/package_reference/cross_encoder/cross_encoder.md
+# CrossEncoder
+
+## CrossEncoder
+For an introduction to Cross-Encoders, see [Cross-Encoders](../../examples/applications/cross-encoder/README.md).
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.CrossEncoder
+   :members:
+```
+
+## Training Inputs
+
+```eval_rst
+.. autoclass:: sentence_transformers.readers.InputExample
+```
\ No newline at end of file
--- a/docs/package_reference/cross_encoder/evaluation.md
+++ b/docs/package_reference/cross_encoder/evaluation.md
+# Evaluation
+CrossEncoder have their own evaluation classes, that are in `sentence_transformers.cross_encoder.evaluation`.
+
+## CEBinaryAccuracyEvaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEBinaryAccuracyEvaluator
+```
+## CEBinaryClassificationEvaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEBinaryClassificationEvaluator
+```
+
+## CECorrelationEvaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CECorrelationEvaluator
+```
+
+## CEF1Evaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEF1Evaluator
+```
+
+## CESoftmaxAccuracyEvaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CESoftmaxAccuracyEvaluator
+```
+
+## CERerankingEvaluator
+```eval_rst
+.. autoclass:: sentence_transformers.cross_encoder.evaluation.CERerankingEvaluator
+```
\ No newline at end of file