Commit b0f4f53a authored by Rayyyyy's avatar Rayyyyy
Browse files

Update version according to github

parent 392df446
docs:
sphinx-build -c . -a -E .. _build
\ No newline at end of file
sphinx-build -c . -a -E .. _build
docs-quick:
sphinx-build -c . .. _build
\ No newline at end of file
......@@ -24,4 +24,88 @@ dl.class > dt {
.wy-side-nav-search {
padding-top: 0px;
}
\ No newline at end of file
}
.components {
display: flex;
flex-flow: row wrap;
}
.components > .box {
flex: 1;
margin: 0.5rem;
padding: 1rem;
border-style: solid;
border-width: 1px;
border-radius: 0.5rem;
border-color: rgb(55 65 81);
background-color: #e3e3e3;
color: #404040; /* Override the colors imposed by <a href> */
}
.components > .box:nth-child(1) > .header {
background-image: linear-gradient(to bottom right, #60a5fa, #3b82f6);
}
.components > .box:nth-child(2) > .header {
background-image: linear-gradient(to bottom right, #fb923c, #f97316);
}
.components > .box:nth-child(3) > .header {
background-image: linear-gradient(to bottom right, #f472b6, #ec4899);
}
.components > .box:nth-child(4) > .header {
background-image: linear-gradient(to bottom right, #a78bfa, #8b5cf6);
}
.components > .box:nth-child(5) > .header {
background-image: linear-gradient(to bottom right, #34d399, #10b981);
}
.components > .optional {
background: repeating-linear-gradient(
135deg,
#f1f1f1,
#f1f1f1 25px,
#e3e3e3 25px,
#e3e3e3 50px
);
}
.components > .box > .header {
border-style: solid;
border-width: 1px;
border-radius: 0.5rem;
border-color: rgb(55 65 81);
padding: 0.5rem;
text-align: center;
margin-bottom: 0.5rem;
font-weight: bold;
color: white;
}
.sidebar p {
font-size: 100% !important;
}
.training-arguments {
background-color: #f3f6f6;
border: 1px solid #e1e4e5;
}
.training-arguments > .header {
font-weight: 700;
padding: 6px 12px;
background: #e1e4e5;
}
.training-arguments > .table {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(15em, 1fr));
}
.training-arguments > .table > a {
padding: 0.5rem;
border: 1px solid #e1e4e5;
}
......@@ -8,7 +8,6 @@ from os import path
import sphinx
__version__ = "0.5.0"
__version_full__ = __version__
......
......@@ -24,9 +24,6 @@
&copy; {% trans %}Copyright{% endtrans %} {{ copyright }}
{%- endif %}
{%- endif %}
&bull; <a href="/docs/contact.html">Contact</a>
{%- if build_id and build_url %}
<span class="build">
{# Translators: Build is a noun, not a verb #}
......
......@@ -121,8 +121,12 @@
</a>
<div style="display: flex; justify-content: center;">
<div id="twitter-button">
<!-- This snippet adds a "Follow SBERT on Twitter" button. I'll remove it as Nils doesn't post about SBERT anmymore -->
<!-- <div id="twitter-button">
<a href="https://twitter.com/Nils_Reimers" target="_blank" title="Follow SBERT on Twitter"><img src="/_static/Twitter_Logo_White.svg" height="20" style="margin: 0px 10px 0px -10px;"> </a>
</div> -->
<div id="hf-button">
<a href="https://huggingface.co/models?library=sentence-transformers" target="_blank" title="See all Sentence Transformer models"><img src="{{ pathto('_static/hf-logo.svg', 1) }}" style="margin: 0px 10px 0px -10px; padding: 0px; height: 28px; width: 28px;"></a>
</div>
<div id="github-button"></div>
</div>
......
......@@ -5,22 +5,23 @@
Template for the search page.
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
:license: BSD, see https://github.com/sphinx-doc/sphinx/blob/master/LICENSE for details.
#}
{%- extends "layout.html" %}
{% set title = _('Search') %}
{% set display_vcs_links = False %}
{%- block scripts %}
{{ super() }}
<script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
<script src="{{ pathto('_static/searchtools.js', 1) }}"></script>
<script src="{{ pathto('_static/language_data.js', 1) }}"></script>
{%- endblock %}
{% block footer %}
<script type="text/javascript">
<script>
jQuery(function() { Search.loadIndex("{{ pathto('searchindex.js', 1) }}"); });
</script>
{# this is used when loading the search index using $.ajax fails,
such as on Chrome for documents on localhost #}
<script type="text/javascript" id="searchindexloader"></script>
<script id="searchindexloader"></script>
{{ super() }}
{% endblock %}
{% block body %}
......
......@@ -8,7 +8,7 @@ canonical_url =
analytics_id =
collapse_navigation = True
sticky_navigation = True
navigation_depth = 4
navigation_depth =
includehidden = True
titles_only =
logo_only =
......
......@@ -14,15 +14,19 @@
# import sys
# sys.path.insert(0, os.path.abspath('.'))
from recommonmark.transform import AutoStructify
import datetime
import importlib
import inspect
import os
from recommonmark.transform import AutoStructify
from sphinx.domains import Domain
import datetime
# -- Project information -----------------------------------------------------
project = "Sentence-Transformers"
copyright = str(datetime.datetime.now().year) + ", Nils Reimers"
author = "Nils Reimers"
project = "Sentence Transformers"
copyright = str(datetime.datetime.now().year)
author = "Nils Reimers, Tom Aarsen"
# -- General configuration ---------------------------------------------------
......@@ -30,7 +34,16 @@ author = "Nils Reimers"
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ["sphinx.ext.autodoc", "recommonmark", "sphinx_markdown_tables"]
extensions = [
"sphinx.ext.napoleon",
"sphinx.ext.autodoc",
"recommonmark",
"sphinx_markdown_tables",
"sphinx_copybutton",
"sphinx.ext.intersphinx",
"sphinx.ext.linkcode",
"sphinx_inline_tabs",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
......@@ -38,7 +51,24 @@ templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "nr_examples"]
exclude_patterns = [
"_build",
"Thumbs.db",
".DS_Store",
"nr_examples",
"archived",
"dist",
"build",
"output",
"models",
"model_card_template.md",
]
intersphinx_mapping = {
"datasets": ("https://huggingface.co/docs/datasets/main/en/", None),
"transformers": ("https://huggingface.co/docs/transformers/main/en/", None),
"torch": ("https://pytorch.org/docs/stable/", None),
}
# -- Options for HTML output -------------------------------------------------
......@@ -49,7 +79,11 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "nr_examples"]
html_theme = "sphinx_rtd_theme"
html_theme_path = ["_themes"]
html_theme_options = {"logo_only": True, "canonical_url": "https://www.sbert.net"}
html_theme_options = {
"logo_only": True,
"canonical_url": "https://www.sbert.net",
"collapse_navigation": False,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
......@@ -78,6 +112,43 @@ html_favicon = "img/favicon.ico"
autoclass_content = "both"
# https://github.com/readthedocs/sphinx-autoapi/issues/202#issuecomment-907582382
def linkcode_resolve(domain, info):
# Non-linkable objects from the starter kit in the tutorial.
if domain == "js" or info["module"] == "connect4":
return
assert domain == "py", "expected only Python objects"
mod = importlib.import_module(info["module"])
if "." in info["fullname"]:
objname, attrname = info["fullname"].split(".")
obj = getattr(mod, objname)
try:
# object is a method of a class
obj = getattr(obj, attrname)
except AttributeError:
# object is an attribute of a class
return None
else:
obj = getattr(mod, info["fullname"])
obj = inspect.unwrap(obj)
try:
file = inspect.getsourcefile(obj)
lines = inspect.getsourcelines(obj)
except TypeError:
# e.g. object is a typing.Union
return None
file = os.path.relpath(file, os.path.abspath(".."))
if not file.startswith("sentence_transformers"):
# e.g. object is a typing.NewType
return None
start, end = lines[1], lines[1] + len(lines[0]) - 1
return f"https://github.com/UKPLab/sentence-transformers/blob/master/{file}#L{start}-L{end}"
class GithubURLDomain(Domain):
"""
Resolve .py links to their respective Github URL
......
# Pretrained Models
We have released various pre-trained Cross Encoder models via our [Cross Encoder Hugging Face organization](https://huggingface.co/models?author=cross-encoder). Additionally, numerous community CrossEncoder models have been publicly released on the Hugging Face Hub.
Each of these models can be easily downloaded and used like so:
```python
from sentence_transformers import CrossEncoder
import torch
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", default_activation_function=torch.nn.Sigmoid())
scores = model.predict([
("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
("How many people live in Berlin?", "Berlin is well known for its museums."),
])
# => array([0.9998173 , 0.01312432], dtype=float32)
```
Cross-Encoders require text pairs as inputs and output a score 0...1 (if the Sigmoid activation function is used). They do not work for individual sentences and they don't compute embeddings for individual texts.
## MS MARCO
[MS MARCO Passage Retrieval](https://github.com/microsoft/MSMARCO-Passage-Ranking) is a large dataset with real user queries from Bing search engine with annotated relevant text passages.
```eval_rst
.. note::
You can initialize these models with ``default_activation_function=torch.nn.Sigmoid()`` to force the model to return scores between 0 and 1. Otherwise, the raw value can reasonably range between -10 and 10.
```
- [cross-encoder/ms-marco-TinyBERT-L-2-v2](https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2) - MRR@10 on MS Marco Dev Set: 32.56
- [cross-encoder/ms-marco-MiniLM-L-2-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-2-v2) - MRR@10 on MS Marco Dev Set: 34.85
- [cross-encoder/ms-marco-MiniLM-L-4-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2) - MRR@10 on MS Marco Dev Set: 37.70
- [cross-encoder/ms-marco-MiniLM-L-6-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-6-v2) - MRR@10 on MS Marco Dev Set: 39.01
- [cross-encoder/ms-marco-MiniLM-L-12-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-12-v2) - MRR@10 on MS Marco Dev Set: 39.02
For details on the usage, see [Retrieve & Re-Rank](../../examples/applications/retrieve_rerank/README.md) or [MS MARCO Cross-Encoders](../pretrained-models/ce-msmarco.md).
## SQuAD (QNLI)
QNLI is based on the [SQuAD dataset](https://rajpurkar.github.io/SQuAD-explorer/) ([HF](https://huggingface.co/datasets/rajpurkar/squad)) and was introduced by the [GLUE Benchmark](https://arxiv.org/abs/1804.07461) ([HF](https://huggingface.co/datasets/nyu-mll/glue)). Given a passage from Wikipedia, annotators created questions that are answerable by that passage.
- [cross-encoder/qnli-distilroberta-base](https://huggingface.co/cross-encoder/qnli-distilroberta-base) - Accuracy on QNLI dev set: 90.96
- [cross-encoder/qnli-electra-base](https://huggingface.co/cross-encoder/qnli-electra-base) - Accuracy on QNLI dev set: 93.21
## STSbenchmark
The following models can be used like this:
```python
from sentence_transformers import CrossEncoder
model = CrossEncoder("cross-encoder/stsb-roberta-base")
scores = model.predict([("It's a wonderful day outside.", "It's so sunny today!"), ("It's a wonderful day outside.", "He drove to work earlier.")])
# => array([0.60443085, 0.00240758], dtype=float32)
```
They return a score 0...1 indicating the semantic similarity of the given sentence pair.
- [cross-encoder/stsb-TinyBERT-L-4](https://huggingface.co/cross-encoder/stsb-TinyBERT-L-4) - STSbenchmark test performance: 85.50
- [cross-encoder/stsb-distilroberta-base](https://huggingface.co/cross-encoder/stsb-distilroberta-base) - STSbenchmark test performance: 87.92
- [cross-encoder/stsb-roberta-base](https://huggingface.co/cross-encoder/stsb-roberta-base) - STSbenchmark test performance: 90.17
- [cross-encoder/stsb-roberta-large](https://huggingface.co/cross-encoder/stsb-roberta-large) - STSbenchmark test performance: 91.47
## Quora Duplicate Questions
These models have been trained on the [Quora duplicate questions dataset](https://huggingface.co/datasets/sentence-transformers/quora-duplicates). They can used like the STSb models and give a score 0...1 indicating the probability that two questions are duplicate questions.
- [cross-encoder/quora-distilroberta-base](https://huggingface.co/cross-encoder/quora-distilroberta-base) - Average Precision dev set: 87.48
- [cross-encoder/quora-roberta-base](https://huggingface.co/cross-encoder/quora-roberta-base) - Average Precision dev set: 87.80
- [cross-encoder/quora-roberta-large](https://huggingface.co/cross-encoder/quora-roberta-large) - Average Precision dev set: 87.91
```eval_rst
.. note::
The model don't work for question similarity. The question *How to learn Java* and *How to learn Python* will get a low score, as these questions are not duplicates. For question similarity, the respective bi-encoder trained on the Quora dataset yields much more meaningful results.
```
## NLI
Given two sentences, are these contradicting each other, entailing one the other or are these netural? The following models were trained on the [SNLI](https://huggingface.co/datasets/stanfordnlp/snli) and [MultiNLI](https://huggingface.co/datasets/nyu-mll/multi_nli) datasets.
- [cross-encoder/nli-deberta-v3-base](https://huggingface.co/cross-encoder/nli-deberta-v3-base) - Accuracy on MNLI mismatched set: 90.04
- [cross-encoder/nli-deberta-base](https://huggingface.co/cross-encoder/nli-deberta-base) - Accuracy on MNLI mismatched set: 88.08
- [cross-encoder/nli-deberta-v3-xsmall](https://huggingface.co/cross-encoder/nli-deberta-v3-xsmall) - Accuracy on MNLI mismatched set: 87.77
- [cross-encoder/nli-deberta-v3-small](https://huggingface.co/cross-encoder/nli-deberta-v3-small) - Accuracy on MNLI mismatched set: 87.55
- [cross-encoder/nli-roberta-base](https://huggingface.co/cross-encoder/nli-roberta-base) - Accuracy on MNLI mismatched set: 87.47
- [cross-encoder/nli-MiniLM2-L6-H768](https://huggingface.co/cross-encoder/nli-MiniLM2-L6-H768) - Accuracy on MNLI mismatched set: 86.89
- [cross-encoder/nli-distilroberta-base](https://huggingface.co/cross-encoder/nli-distilroberta-base) - Accuracy on MNLI mismatched set: 83.98
```python
from sentence_transformers import CrossEncoder
model = CrossEncoder("cross-encoder/nli-deberta-v3-base")
scores = model.predict([
("A man is eating pizza", "A man eats something"),
("A black race car starts up in front of a crowd of people.", "A man is driving down a lonely road."),
])
# Convert scores to labels
label_mapping = ["contradiction", "entailment", "neutral"]
labels = [label_mapping[score_max] for score_max in scores.argmax(axis=1)]
# => ['entailment', 'contradiction']
```
## Community Models
Some notable models from the Community include:
- [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)
- [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large)
- [BAAI/bge-reranker-v2-m3](https://huggingface.co/BAAI/bge-reranker-v2-m3)
- [BAAI/bge-reranker-v2-gemma](https://huggingface.co/BAAI/bge-reranker-v2-gemma)
- [BAAI/bge-reranker-v2-minicpm-layerwise](https://huggingface.co/BAAI/bge-reranker-v2-minicpm-layerwise)
- [jinaai/jina-reranker-v1-tiny-en](https://huggingface.co/jinaai/jina-reranker-v1-tiny-en)
- [jinaai/jina-reranker-v1-turbo-en](https://huggingface.co/jinaai/jina-reranker-v1-turbo-en)
- [mixedbread-ai/mxbai-rerank-xsmall-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1)
- [mixedbread-ai/mxbai-rerank-base-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1)
- [mixedbread-ai/mxbai-rerank-large-v1](https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1)
- [maidalun1020/bce-reranker-base_v1](https://huggingface.co/maidalun1020/bce-reranker-base_v1)
\ No newline at end of file
# Training Examples
See the following examples how to train Cross-Encoders:
- [training_stsbenchmark.py](../../../examples/training/cross-encoder/training_stsbenchmark.py) - Example how to train for Semantic Textual Similarity (STS) on the STS benchmark dataset.
- [training_quora_duplicate_questions.py](../../../examples/training/cross-encoder/training_quora_duplicate_questions.py) - Example how to train a Cross-Encoder to predict if two questions are duplicates. Uses Quora Duplicate Questions as training dataset.
- [training_nli.py](../../../examples/training/cross-encoder/training_nli.py) - Example for a multilabel classification task for Natural Language Inference (NLI) task.
```eval_rst
.. toctree::
:maxdepth: 1
:caption: Supervised Learning
../../../examples/training/ms_marco/cross_encoder_README
```
\ No newline at end of file
# Training Overview
```eval_rst
.. note::
The CrossEncoder training approach has not been updated in v3.0 when `training Sentence Transformer models <../sentence_transformer/training_overview.html>`_ was improved. Improving training CrossEncoders is planned for a future major update.
```
The `CrossEncoder` class is a wrapper around Huggingface `AutoModelForSequenceClassification`, but with some methods to make training and predicting scores a little bit easier. The saved models are 100% compatible with Huggingface and can also be loaded with their classes.
First, you need some sentence pair data. You can either have a continuous score, like:
```eval_rst
.. sidebar:: Documentation
- :class:`~sentence_transformers.readers.InputExample`
```
```python
from sentence_transformers import InputExample
train_samples = [
InputExample(texts=["sentence1", "sentence2"], label=0.3),
InputExample(texts=["Another", "pair"], label=0.8),
]
```
Or you have distinct classes as in the [training_nli.py](../../examples/training/cross-encoder/training_nli.py) example:
```python
from sentence_transformers import InputExample
label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}
train_samples = [
InputExample(texts=["sentence1", "sentence2"], label=label2int["neutral"]),
InputExample(texts=["Another", "pair"], label=label2int["entailment"]),
]
```
Then, you define the base model and the number of labels. You can take any [Hugging Face pre-trained model](https://huggingface.co/models) that is compatible with AutoModel:
```
model = CrossEncoder('distilroberta-base', num_labels=1)
```
For binary tasks and tasks with continuous scores (like STS), we set num_labels=1. For classification tasks, we set it to the number of labels we have.
```eval_rst
We start the training by calling :meth:`CrossEncoder.fit <sentence_transformers.cross_encoder.CrossEncoder.fit>`:
.. sidebar:: Documentation
- :class:`~sentence_transformers.cross_encoder.CrossEncoder`
- :meth:`CrossEncoder.fit <sentence_transformers.cross_encoder.CrossEncoder.fit>`
::
model.fit(
train_dataloader=train_dataloader,
evaluator=evaluator,
epochs=num_epochs,
warmup_steps=warmup_steps,
output_path=model_save_path,
)
```
\ No newline at end of file
Usage
=====
Characteristics of Cross Encoder (a.k.a reranker) models:
1. Calculates a **similarity score** given **pairs of texts**.
2. Generally provides **superior performance** compared to a Sentence Transformer (a.k.a. bi-encoder) model.
3. Often **slower** than a Sentence Transformer model, as it requires computation for each pair rather than each text.
4. Due to the previous 2 characteristics, Cross Encoders are often used to **re-rank the top-k results** from a Sentence Transformer model.
Once you have installed `installed <installation.md>`_ Sentence Transformers, you can easily use Cross Encoder models:
.. sidebar:: Documentation
1. :class:`~sentence_transformers.cross_encoder.CrossEncoder`
2. :meth:`CrossEncoder.predict <sentence_transformers.cross_encoder.CrossEncoder.predict>`
3. :meth:`CrossEncoder.rank <sentence_transformers.cross_encoder.CrossEncoder.rank>`
.. note::
MS Marco models return logits rather than scores between 0 and 1. Load the :class:`~sentence_transformers.cross_encoder.CrossEncoder` with ``default_activation_function=torch.nn.Sigmoid()`` to get scores between 0 and 1. This does not affect the ranking.
::
from sentence_transformers import CrossEncoder
# 1. Load a pre-trained CrossEncoder model
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
# 2. Predict scores for a pair of sentences
scores = model.predict([
("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
("How many people live in Berlin?", "Berlin is well known for its museums."),
])
# => array([ 8.607138 , -4.3200774], dtype=float32)
# 3. Rank a list of passages for a query
query = "How many people live in Berlin?"
passages = [
"Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.",
"Berlin is well known for its museums.",
"In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.",
"The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.",
"The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019",
"An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.",
"Berlin is subdivided into 12 boroughs or districts (Bezirke).",
"In 2015, the total labour force in Berlin was 1.85 million.",
"In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.",
"Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.",
]
ranks = model.rank(query, passages)
# Print the scores
print("Query:", query)
for rank in ranks:
print(f"{rank['score']:.2f}\t{passages[rank['corpus_id']]}")
"""
Query: How many people live in Berlin?
8.92 The urban area of Berlin comprised about 4.1 million people in 2014, making it the seventh most populous urban area in the European Union.
8.61 Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.
8.24 An estimated 300,000-420,000 Muslims reside in Berlin, making up about 8-11 percent of the population.
7.60 In 2014, the city state Berlin had 37,368 live births (+6.6%), a record number since 1991.
6.35 In 2013 around 600,000 Berliners were registered in one of the more than 2,300 sport and fitness clubs.
5.42 Berlin has a yearly total of about 135 million day visitors, which puts it in third place among the most-visited city destinations in the European Union.
3.45 In 2015, the total labour force in Berlin was 1.85 million.
0.33 Berlin is subdivided into 12 boroughs or districts (Bezirke).
-4.24 The city of Paris had a population of 2,165,423 people within its administrative city limits as of January 1, 2019
-4.32 Berlin is well known for its museums.
"""
.. toctree::
:maxdepth: 1
:caption: Tasks
../../../examples/applications/retrieve_rerank/README
This diff is collapsed.
# Installation
We recommend **Python 3.8** or higher, **[PyTorch 1.11.0](https://pytorch.org/get-started/locally/)** or higher and **[transformers v4.32.0](https://github.com/huggingface/transformers)** or higher.
We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**. There are three options to install Sentence Transformers:
* **Default:** This allows for loading, saving, and inference (i.e., getting embeddings) of models.
* **Default and Training**: All of the above plus training.
* **Development**: All of the above plus some dependencies for developing Sentence Transformers, see [Editable Install](#editable-install).
## Install SentenceTransformers
## Install with pip
**Install with pip**
```eval_rst
.. tab:: Default
::
pip install -U sentence-transformers
.. tab:: Default and Training
::
pip install -U "sentence-transformers[train]"
To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
pip install wandb
And to track your Carbon Emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
pip install codecarbon
.. tab:: Development
::
pip install -U "sentence-transformers[dev]"
Install the *sentence-transformers* with `pip`:
```
pip install -U sentence-transformers
```
**Install with conda**
## Install with Conda
```eval_rst
.. tab:: Default
::
conda install -c conda-forge sentence-transformers
.. tab:: Default and Training
::
conda install -c conda-forge sentence-transformers accelerate datasets
To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
pip install wandb
And to track your Carbon Emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
pip install codecarbon
.. tab:: Development
::
conda install -c conda-forge sentence-transformers accelerate datasets pre-commit pytest ruff
Apple silicon Installation of *sentence-transformers*
```
conda install -c conda-forge sentence-transformers
## Install from Source
You can install ``sentence-transformers`` directly from source to take advantage of the bleeding edge `master` branch rather than the latest stable release:
```eval_rst
.. tab:: Default
::
pip install git+https://github.com/UKPLab/sentence-transformers.git
.. tab:: Default and Training
::
pip install -U "sentence-transformers[train] @ git+https://github.com/UKPLab/sentence-transformers.git"
To use `Weights and Biases <https://wandb.ai/>`_ to track your training logs, you should also install ``wandb`` **(recommended)**::
pip install wandb
And to track your carbon emissions while training and have this information automatically included in your model cards, also install ``codecarbon`` **(recommended)**::
pip install codecarbon
.. tab:: Development
::
pip install -U "sentence-transformers[dev] @ git+https://github.com/UKPLab/sentence-transformers.git"
```
**Install from source**
## Editable Install
If you want to make changes to ``sentence-transformers``, you will need an editable install. Clone the repository and install it with these commands:
```
git clone https://github.com/UKPLab/sentence-transformers
cd sentence-transformers
pip install -e ".[train,dev]"
```
Alternatively, you can also clone the latest version from the [repository](https://github.com/UKPLab/sentence-transformers) and install it directly from the source code:
````
pip install -e .
````
These commands will link the new `sentence-transformers` folder and your Python library paths, such that this folder will be used when importing `sentence-transformers`.
## Install PyTorch with CUDA support
If you want to use a GPU / CUDA, you must install PyTorch with the matching CUDA Version. Follow
[PyTorch - Get Started](https://pytorch.org/get-started/locally/) for further details how to install PyTorch.
To use a GPU/CUDA, you must install PyTorch with CUDA support. Follow [PyTorch - Get Started](https://pytorch.org/get-started/locally/) for installation steps.
\ No newline at end of file
# CrossEncoder
## CrossEncoder
For an introduction to Cross-Encoders, see [Cross-Encoders](../../examples/applications/cross-encoder/README.md).
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.CrossEncoder
:members:
```
## Training Inputs
```eval_rst
.. autoclass:: sentence_transformers.readers.InputExample
```
\ No newline at end of file
# Evaluation
CrossEncoder have their own evaluation classes, that are in `sentence_transformers.cross_encoder.evaluation`.
## CEBinaryAccuracyEvaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEBinaryAccuracyEvaluator
```
## CEBinaryClassificationEvaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEBinaryClassificationEvaluator
```
## CECorrelationEvaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CECorrelationEvaluator
```
## CEF1Evaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEF1Evaluator
```
## CESoftmaxAccuracyEvaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CESoftmaxAccuracyEvaluator
```
## CERerankingEvaluator
```eval_rst
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CERerankingEvaluator
```
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment