Update version according to github

b0f4f53a · Rayyyyy · 392df446 · b0f4f53a · b0f4f53a · b0f4f53a
Commit b0f4f53a authored May 29, 2024 by Rayyyyy
20 changed files
--- a/examples/applications/embedding-quantization/semantic_search_usearch.py
+++ b/examples/applications/embedding-quantization/semantic_search_usearch.py
 import time
+
+from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
 from sentence_transformers.quantization import quantize_embeddings, semantic_search_usearch
-from datasets import load_dataset

 # 1. Load the quora corpus with questions
 dataset = load_dataset("quora", split="train").map(

--- a/examples/applications/embedding-quantization/semantic_search_usearch_benchmark.py
+++ b/examples/applications/embedding-quantization/semantic_search_usearch_benchmark.py
+from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
 from sentence_transformers.quantization import quantize_embeddings, semantic_search_usearch
-from datasets import load_dataset

 # 1. Load the quora corpus with questions
 dataset = load_dataset("quora", split="train").map(

--- a/examples/applications/image-search/README.md
+++ b/examples/applications/image-search/README.md
@@ -12,7 +12,7 @@ Ensure that you have [transformers](https://pypi.org/project/transformers/) inst
 SentenceTransformers provides a wrapper for the [OpenAI CLIP Model](https://github.com/openai/CLIP), which was trained on a variety of (image, text)-pairs.

 ```python
-from sentence_transformers import SentenceTransformer, util
+from sentence_transformers import SentenceTransformer
 from PIL import Image

 # Load CLIP model
@@ -26,9 +26,9 @@ text_emb = model.encode(
    ["Two dogs in the snow", "A cat on a table", "A picture of London at night"]
 )

-# Compute cosine similarities
-cos_scores = util.cos_sim(img_emb, text_emb)
-print(cos_scores)
+# Compute similarities
+similarity_scores = model.similarity(img_emb, text_emb)
+print(similarity_scores)
 ```

 You can use the CLIP model for:

--- a/examples/applications/image-search/example.py
+++ b/examples/applications/image-search/example.py
-from sentence_transformers import SentenceTransformer, util, models
 from PIL import Image

+from sentence_transformers import SentenceTransformer, models, util

 ###########

 image = Image.open("two_dogs_in_snow.jpg")

-from transformers import CLIPProcessor, CLIPModel
+from transformers import CLIPModel, CLIPProcessor

 model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

--- a/examples/applications/parallel-sentence-mining/README.md
+++ b/examples/applications/parallel-sentence-mining/README.md
@@ -24,10 +24,10 @@ This is an example sentences.   Dies ist ein Beispielsatz.

 Usually you apply this method to large corpora, for example, you want to find all translated sentences in the English Wikipedia and the Chinese Wikipedia. 

-## Marging Based Mining
+## Margin Based Mining

 We follow the setup from [Artetxe and Schwenk, Section 4.3](https://arxiv.org/pdf/1812.10464.pdf) to find translated sentences in two datasets:
-1) First, we encode all sentences to their respective embedding. As shown in [our paper](https://arxiv.org/abs/2004.09813) is [LaBSE](https://tfhub.dev/google/LaBSE/1) currently the best method for bitext mining. The model is integrated in Sentence-Transformers
+1) First, we encode all sentences to their respective embedding. As shown in [our paper](https://arxiv.org/abs/2004.09813) is [LaBSE](https://huggingface.co/sentence-transformers/LaBSE) currently the best method for bitext mining. The model is integrated in Sentence-Transformers
 2) Once we have all embeddings, we find the *k* nearest neighbor sentences for all sentences in both directions. Typical choices for k are between 4 and 16.
 3) Then, we score all possible sentence combinations using the formula mentioned in Section 4.3. 
 4) The pairs with the highest scores are most likely translated sentences. Note, that the score can be larger than 1. Usually you have to find some cut-off where you ignore pairs below that threshold. For a high quality, a threshold of about 1.2 - 1.3 works quite well.
@@ -35,4 +35,4 @@ We follow the setup from [Artetxe and Schwenk, Section 4.3](https://arxiv.org/pd
 ## Examples
 - **[bucc2018.py](bucc2018.py)** - This script contains an example for the [BUCC 2018 shared task](https://comparable.limsi.fr/bucc2018/bucc2018-task.html) on finding parallel sentences. This dataset can be used to evaluate different strategies, as we know which sentences are parallel in the two corpora. The script mines for parallel sentences and then prints the optimal threshold that leads to the highest F1-score.
 - **[bitext_mining.py](bitext_mining.py)** - This file reads in two text files (with a single sentence in each line) and outputs parallel sentences to *parallel-sentences-out.tsv.gz.
-  **[In-domain Data Selection for MT](https://www.clinjournal.org/clinj/article/view/137)** - This paper also employed S-BERT to generate/select in-domain parallel data for machine translation systems – using monolingual texts. 
+-  **[In-domain Data Selection for MT](https://www.clinjournal.org/clinj/article/view/137)** - This paper also employed Sentence Transformers to generate/select in-domain parallel data for machine translation systems – using monolingual texts. 
--- a/examples/applications/parallel-sentence-mining/bitext_mining.py
+++ b/examples/applications/parallel-sentence-mining/bitext_mining.py
@@ -13,13 +13,15 @@ This script requires that you have FAISS installed:
 https://github.com/facebookresearch/faiss
 """

-from sentence_transformers import SentenceTransformer, models
-import numpy as np
-from bitext_mining_utils import score_candidates, kNN, file_open
 import gzip
+
+import numpy as np
+import torch
 import tqdm
+from bitext_mining_utils import file_open, kNN, score_candidates
 from sklearn.decomposition import PCA
-import torch
+
+from sentence_transformers import SentenceTransformer, models

 # Model we want to use for bitext mining. LaBSE achieves state-of-the-art performance
 model_name = "LaBSE"

--- a/examples/applications/parallel-sentence-mining/bitext_mining_utils.py
+++ b/examples/applications/parallel-sentence-mining/bitext_mining_utils.py
@@ -6,11 +6,12 @@ Code in this file has been adapted from the LASER repository:
 https://github.com/facebookresearch/LASER
 """

-import faiss
-import numpy as np
-import time
 import gzip
 import lzma
+import time
+
+import faiss
+import numpy as np


 ########  Functions to find and score candidates

--- a/examples/applications/parallel-sentence-mining/bucc2018.py
+++ b/examples/applications/parallel-sentence-mining/bucc2018.py
@@ -10,14 +10,16 @@ This script requires that you have FAISS installed:
 https://github.com/facebookresearch/faiss
 """

-from sentence_transformers import SentenceTransformer, models
-from collections import defaultdict
 import os
 import pickle
-from sklearn.decomposition import PCA
-import torch
-from bitext_mining_utils import score_candidates, kNN
+from collections import defaultdict
+
 import numpy as np
+import torch
+from bitext_mining_utils import kNN, score_candidates
+from sklearn.decomposition import PCA
+
+from sentence_transformers import SentenceTransformer, models

 # Model we want to use for bitext mining. LaBSE achieves state-of-the-art performance
 model_name = "LaBSE"

--- a/examples/applications/paraphrase-mining/README.md
+++ b/examples/applications/paraphrase-mining/README.md
 # Paraphrase Mining

-Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences. In [Semantic Textual Similarity](../../../docs/usage/semantic_textual_similarity.md) we saw a simplified version of finding paraphrases in a list of sentences. The approach presented there used a brute-force approach to score and rank all pairs. 
+Paraphrase mining is the task of finding paraphrases (texts with identical / similar meaning) in a large corpus of sentences. In [Semantic Textual Similarity](../../../docs/sentence_transformer/usage/semantic_textual_similarity.rst) we saw a simplified version of finding paraphrases in a list of sentences. The approach presented there used a brute-force approach to score and rank all pairs. 

-However, as this has a quadratic runtime, it fails to scale to large (10,000 and more) collections of sentences.
+```eval_rst
+However, as this has a quadratic runtime, it fails to scale to large (10,000 and more) collections of sentences. For larger collections, the :func:`~sentence_transformers.util.paraphrase_mining` function can be used::

-For larger collections, *util* offers the *paraphrase_mining* function that can be used like this:
-```python
-from sentence_transformers import SentenceTransformer, util
+    from sentence_transformers import SentenceTransformer
+    from sentence_transformers.util import paraphrase_mining

-model = SentenceTransformer("all-MiniLM-L6-v2")
+    model = SentenceTransformer("all-MiniLM-L6-v2")

-# Single list of sentences - Possible tens of thousands of sentences
-sentences = [
+    # Single list of sentences - Possible tens of thousands of sentences
+    sentences = [
        "The cat sits outside",
        "A man is playing guitar",
        "I love pasta",
@@ -20,35 +20,30 @@ sentences = [
        "A woman watches TV",
        "The new movie is so great",
        "Do you like pizza?",
-]
+    ]

-paraphrases = util.paraphrase_mining(model, sentences)
+    paraphrases = paraphrase_mining(model, sentences)

-for paraphrase in paraphrases[0:10]:
+    for paraphrase in paraphrases[0:10]:
        score, i, j = paraphrase
        print("{} \t\t {} \t\t Score: {:.4f}".format(sentences[i], sentences[j], score))
-```

-The **paraphrase_mining()**-method accepts the following parameters:
-```eval_rst
-.. autofunction:: sentence_transformers.util.paraphrase_mining
-```
+The :func:`~sentence_transformers.util.paraphrase_mining` accepts the following parameters:

-Instead of computing all pairwise cosine scores and ranking all possible, combinations, the approach is a bit more complex (and hence efficient). We chunk our corpus into smaller pieces, which is defined by *query_chunk_size* and *corpus_chunk_size*. For example, if we set *query_chunk_size=1000*, we search paraphrases for 1,000 sentences at a time in the remaining corpus (all other sentences). However, the remaining corpus is also chunked, for example, if we set *corpus_chunk_size=10000*, we look for paraphrases in 10k sentences at a time.
-
-If we pass a list of 20k sentences, we will chunk it to 20x1000 sentences, and each of the query is compared first against sentences 0-10k and then 10k-20k.
+.. autofunction:: sentence_transformers.util.paraphrase_mining

-This is done to reduce the memory requirement. Increasing both values improves the speed, but increases also the memory requirement.
+To optimize memory and computation time, paraphrase mining is performed in chunks, as specified by ``query_chunk_size`` and ``corpus_chunk_size``.
+To be specific, only ``query_chunk_size * corpus_chunk_size`` pairs will be compared at a time, rather than ``len(sentences) * len(sentences)``. This is more time- and memory-efficient. Additionally, :func:`~sentence_transformers.util.paraphrase_mining` only considers the ``top_k`` best scores per sentences per chunk. You can experiment with this value as an efficiency-performance trade-off.

+For example, for each sentence you will get only the one most relevant sentence in this script.

-The next critical thing is finding the pairs with the highest similarities. Instead of getting and sorting all n^2 pairwise scores, we take for each query only the *top_k* scores. So with *top_k=100*, we find at most 100 paraphrases per sentence per chunk. You can play around with *top_k* to the ensure a certain behaviour.
+::

-So for example, with
-```python
-paraphrases = util.paraphrase_mining(model, sentences, corpus_chunk_size=len(sentences), top_k=1)
-```
+    paraphrases = paraphrase_mining(model, sentences, corpus_chunk_size=len(sentences), top_k=1)

-You will get for each sentence only the one most other relevant sentence. Note, if B is the most similar sentence for A, A must not be the most similar sentence for B. So it can happen that the returned list contains entries like (A, B) and (B, C).
+The final key parameter is ``max_pairs``, which determines the maximum number of paraphrase pairs that the function returns. Usually, you get fewer pairs returned because the list is cleaned of duplicates, e.g., if it contains (A, B) and (B, A), then only one is returned.

-The final relevant parameter is *max_pairs*, which determines the maximum number of paraphrase pairs you like to get returned. If you set it to e.g. *max_pairs=100*, you will not get more than 100 paraphrase pairs returned. Usually, you get fewer pairs returned as the list is cleaned of duplicates, e.g., if it contains (A, B) and (B, A), then only one is returned.
+.. note::
    
+    If B is the most similar sentence for A, A is not necessarily the most similar sentence for B. So it can happen that the returned list contains entries like (A, B) and (B, C).
+```
\ No newline at end of file
--- a/examples/applications/retrieve_rerank/README.md
+++ b/examples/applications/retrieve_rerank/README.md
 # Retrieve & Re-Rank
 In [Semantic Search](../semantic-search/README.md) we have shown how to use SentenceTransformer to compute embeddings for queries, sentences, and paragraphs and how to use this for semantic search. 

-For complex search tasks, for example, for question answering retrieval, the search can significantly be improved by using **Retrieve & Re-Rank**.
+For complex search tasks, for example question answering retrieval, the search can significantly be improved by using **Retrieve & Re-Rank**.

 ## Retrieve & Re-Rank Pipeline

-A pipeline for information retrieval / question answering retrieval that works well is the following. All components are provided and explained in this article:
+The following pipeline for Information Retrieval / Question Answering Retrieval works very well. All components are provided and explained in this article:

 ![InformationRetrieval](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/InformationRetrieval.png)

-Given a search query, we first use a **retrieval system** that retrieves a large list of e.g. 100 possible hits which are potentially relevant for the query. For the retrieval, we can use either lexical search, e.g. with Elasticsearch, or we can use dense retrieval with a bi-encoder. 
-
-However, the retrieval system might retrieve documents that are not that relevant for the search query. Hence, in a second stage, we use a **re-ranker** based on a **cross-encoder** that scores the relevancy of all candidates for the given search query. 
-
-The output will be a ranked list of hits we can present to the user.
+Given a search query, we first use a **retrieval system** that retrieves a large list of e.g. 100 possible hits which are potentially relevant for the query. For the retrieval, we can use either lexical search, e.g. with a vector engine like Elasticsearch, or we can use dense retrieval with a bi-encoder.  However, the retrieval system might retrieve documents that are not that relevant for the search query. Hence, in a second stage, we use a **re-ranker** based on a **cross-encoder** that scores the relevancy of all candidates for the given search query. The output will be a ranked list of hits we can present to the user.

 ## Retrieval: Bi-Encoder
-For the retrieval of the candidate set, we can either use lexical search (e.g. [Elasticsearch](https://www.elastic.co/elasticsearch/)), or we can use a bi-encoder which is implemented in this repository.
+For the retrieval of the candidate set, we can either use lexical search (e.g. [Elasticsearch](https://www.elastic.co/elasticsearch/)), or we can use a bi-encoder which is implemented in Sentence Transformers.

 Lexical search looks for literal matches of the query words in your document collection. It will not recognize synonyms, acronyms or spelling variations. In contrast, semantic search (or dense retrieval) encodes the search query into vector space and retrieves the document embeddings that are close in vector space. 

 ![SemanticSearch](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/SemanticSearch.png)

-Semantic search overcomes the short comings of lexical search and can recognize synonym and acronyms. Have a look at the [semantic search article](../semantic-search/README.md)  for different options to implement semantic search.
+Semantic search overcomes the shortcomings of lexical search and can recognize synonym and acronyms. Have a look at the [semantic search article](../semantic-search/README.md) for different options to implement semantic search.


 ## Re-Ranker: Cross-Encoder

-The retriever has to be efficient for large document collections with millions of entries. However, it might return irrelevant candidates.
-
-A re-ranker based on a Cross-Encoder can substantially improve the final results for the user. The query and a possible document is passed simultaneously to transformer network, which then outputs a single score between 0 and 1 indicating how relevant the document is for the given query. 
+The retriever has to be efficient for large document collections with millions of entries. However, it might return irrelevant candidates. A re-ranker based on a Cross-Encoder can substantially improve the final results for the user. The query and a possible document is passed simultaneously to transformer network, which then outputs a single score between 0 and 1 indicating how relevant the document is for the given query. 

 ![CrossEncoder](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/CrossEncoder.png)

-The advantage of Cross-Encoders is the higher performance, as they perform attention across the query and the document. 
-
-Scoring thousands or millions of (query, document)-pairs would be rather slow. Hence, we use the retriever to create a set of e.g. 100 possible candidates which are then re-ranked by the Cross-Encoder.
+The advantage of Cross-Encoders is the higher performance, as they perform attention across the query and the document. Scoring thousands or millions of (query, document)-pairs would be rather slow. Hence, we use the retriever to create a set of e.g. 100 possible candidates which are then re-ranked by the Cross-Encoder.

 ## Example Scripts

@@ -50,7 +42,7 @@ The bi-encoder produces embeddings independently for your paragraphs and for you
 ```python
 from sentence_transformers import SentenceTransformer

-model = SentenceTransformer("model_name")
+model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

 docs = [
    "My first paragraph. That contains information",
@@ -65,9 +57,8 @@ query_embedding = model.encode(query)
 For more details how to compare the embeddings, see [semantic search](../semantic-search/README.md).

 We provide pre-trained models based on:
- **MS MARCO:** 500k real user queries from Bing search engine. See [MS MARCO models](https://www.sbert.net/docs/pretrained-models/msmarco-v3.html) 
+- **MS MARCO:** 500k real user queries from Bing search engine. See [MS MARCO models](../../../docs/pretrained-models/msmarco-v3.html) 

 ## Pre-trained Cross-Encoders (Re-Ranker)

-
-For pre-trained models, see: [MS MARCO Cross-Encoders](https://www.sbert.net/docs/pretrained-models/ce-msmarco.html)
+For pre-trained Cross Encoder models, see: [MS MARCO Cross-Encoders](../../../docs/pretrained-models/ce-msmarco.html)
--- a/examples/applications/retrieve_rerank/in_document_search_crossencoder.py
+++ b/examples/applications/retrieve_rerank/in_document_search_crossencoder.py
@@ -17,10 +17,11 @@ Usage: python in_document_search_crossencoder.py
 Note: Requires NLTK: `pip install nltk`
 """

-from sentence_transformers import CrossEncoder
-from nltk import sent_tokenize
 import time

+from nltk import sent_tokenize
+
+from sentence_transformers import CrossEncoder

 # As document, we take the first two section from the Wikipedia article about Europe
 document = """Europe is a continent located entirely in the Northern Hemisphere and mostly in the Eastern Hemisphere. It comprises the westernmost part of Eurasia and is bordered by the Arctic Ocean to the north, the Atlantic Ocean to the west, the Mediterranean Sea to the south, and Asia to the east. Europe is commonly considered to be separated from Asia by the watershed of the Ural Mountains, the Ural River, the Caspian Sea, the Greater Caucasus, the Black Sea, and the waterways of the Turkish Straits. Although some of this border is over land, Europe is generally accorded the status of a full continent because of its great physical size and the weight of history and tradition.

--- a/examples/applications/semantic-search/README.md
+++ b/examples/applications/semantic-search/README.md
 # Semantic Search
-Semantic search seeks to improve search accuracy by understanding the content of the search query. In contrast to traditional search engines which only find documents based on lexical matches, semantic search can also find synonyms.
-
+Semantic search seeks to improve search accuracy by understanding the semantic meaning of the search query and the corpus to search over. Semantic search can also perform well given synonyms, abbreviations, and misspellings, unlike keyword search engines that can only find documents based on lexical matches.

 ## Background
-The idea behind semantic search is to embed all entries in your corpus, whether they be sentences, paragraphs, or documents, into a vector space. 
-
-At search time, the query is embedded into the same vector space and the closest embeddings from your corpus are found. These entries should have a high semantic overlap with the query.
+The idea behind semantic search is to embed all entries in your corpus, whether they be sentences, paragraphs, or documents, into a vector space. At search time, the query is embedded into the same vector space and the closest embeddings from your corpus are found. These entries should have a high semantic similarity with the query.

 ![SemanticSearch](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/SemanticSearch.png) 

-
 ## Symmetric vs. Asymmetric Semantic Search

 A **critical distinction** for your setup is *symmetric* vs. *asymmetric semantic search*:
 - For **symmetric semantic search** your query and the entries in your corpus are of about the same length and have the same amount of content. An example would be searching for similar questions: Your query could for example be *"How to learn Python online?"* and you want to find an entry like *"How to learn Python on the web?"*. For symmetric tasks, you could potentially flip the query and the entries in your corpus. 
+    - Related training example: [Quora Duplicate Questions](../../training/quora_duplicate_questions/README.md).
+    - Suitable models: [Pre-Trained Sentence Embedding Models](../../../docs/sentence_transformer/pretrained_models#sentence-embedding-models)
 - For **asymmetric semantic search**, you usually have a **short query** (like a question or some keywords) and you want to find a longer paragraph answering the query. An example would be a query like *"What is Python"* and you want to find the paragraph *"Python is an interpreted, high-level and general-purpose programming language. Python's design philosophy ..."*. For asymmetric tasks, flipping the query and the entries in your corpus usually does not make sense.
+    - Related training example: [MS MARCO](../../training/ms_marco/README.html)
+    - Suitable models: [Pre-Trained MS MARCO Models](../../../docs/pretrained-models/msmarco-v3.md)

 It is critical **that you choose the right model** for your type of task.

-Suitable models for **symmetric semantic search**: [Pre-Trained Sentence Embedding Models](https://www.sbert.net/docs/pretrained_models.html#sentence-embedding-models)
-
-
-Suitable models for **asymmetric semantic search**: [Pre-Trained MS MARCO Models](https://www.sbert.net/docs/pretrained-models/msmarco-v3.html)
-
-
-
-## Python
-
-For small corpora (up to about 1 million entries) we can compute the cosine-similarity between the query and all entries in the corpus.
-
-In the following example, we define a small corpus with few example sentences and compute the embeddings for the corpus as well as for our query.
-
-We then use the [util.cos_sim()](../../../docs/usage/semantic_textual_similarity.md) function to compute the cosine similarity between the query and all corpus entries.
-
-For large corpora, sorting all scores would take too much time. Hence, we use [torch.topk](https://pytorch.org/docs/stable/generated/torch.topk.html) to only get the top k entries.
+## Manual Implementation

+For small corpora (up to about 1 million entries), we can perform semantic search with a manual implementation by computing the embeddings for the corpus as well as for our query, and then calculating the [semantic textual similarity](../../../docs/sentence_transformer/usage/semantic_textual_similarity.rst) using [<code>SentenceTransformer.similarity</code>](../../../docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer.similarity).
 For a simple example, see [semantic_search.py](semantic_search.py):

 ```eval_rst
+
+.. sidebar:: Output
+
+   .. code-block:: txt
+
+        Query: A man is eating pasta.
+        Top 5 most similar sentences in corpus:
+        A man is eating food. (Score: 0.7035)
+        A man is eating a piece of bread. (Score: 0.5272)
+        A man is riding a horse. (Score: 0.1889)
+        A man is riding a white horse on an enclosed ground. (Score: 0.1047)
+        A cheetah is running behind its prey. (Score: 0.0980)
+
+        Query: Someone in a gorilla costume is playing a set of drums.
+        Top 5 most similar sentences in corpus:
+        A monkey is playing drums. (Score: 0.6433)
+        A woman is playing violin. (Score: 0.2564)
+        A man is riding a horse. (Score: 0.1389)
+        A man is riding a white horse on an enclosed ground. (Score: 0.1191)
+        A cheetah is running behind its prey. (Score: 0.1080)
+
+        Query: A cheetah chases prey on across a field.
+        Top 5 most similar sentences in corpus:
+        A cheetah is running behind its prey. (Score: 0.8253)
+        A man is eating food. (Score: 0.1399)
+        A monkey is playing drums. (Score: 0.1292)
+        A man is riding a white horse on an enclosed ground. (Score: 0.1097)
+        A man is riding a horse. (Score: 0.0650)
+
 .. literalinclude:: semantic_search.py
 ```


-## util.semantic_search
+## Optimized Implementation

-Instead of implementing semantic search by yourself, you can use the *util.semantic_search* function.
+Instead of implementing semantic search by yourself, you can use the [<code>util.semantic_search</code>](../../../docs/package_reference/util.html#sentence_transformers.util.semantic_search) function.

 The function accepts the following parameters:

@@ -52,12 +67,10 @@ The function accepts the following parameters:
 .. autofunction:: sentence_transformers.util.semantic_search
 ```

-By default, up to 100 queries are processed in parallel. Further, the corpus is chunked into set of up to 500k entries. You can increase *query_chunk_size* and *corpus_chunk_size*, which leads to increased speed for large corpora, but also increases the memory requirement.
+By default, up to 100 queries are processed in parallel. Further, the corpus is chunked into set of up to 500k entries. You can increase ``query_chunk_size`` and ``corpus_chunk_size``, which leads to increased speed for large corpora, but also increases the memory requirement.

 ## Speed Optimization
-To get the optimal speed for the `util.semantic_search` method, it is advisable to have the `query_embeddings` as well as the `corpus_embeddings` on the same GPU-device. This significantly boost the performance.
-
-Further, we can normalize the corpus embeddings so that each corpus embeddings is of length 1. In that case, we can use dot-product for computing scores.
+To get the optimal speed for the [<code>util.semantic_search</code>](../../../docs/package_reference/util.html#sentence_transformers.util.semantic_search) method, it is advisable to have the `query_embeddings` as well as the `corpus_embeddings` on the same GPU-device. This significantly boost the performance. Further, we can normalize the corpus embeddings so that each corpus embeddings is of length 1. In that case, we can use dot-product for computing scores.
 ```python
 corpus_embeddings = corpus_embeddings.to("cuda")
 corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
@@ -67,9 +80,6 @@ query_embeddings = util.normalize_embeddings(query_embeddings)
 hits = util.semantic_search(query_embeddings, corpus_embeddings, score_function=util.dot_score)
 ```

-
-
-
 ## Elasticsearch
 [Elasticsearch](https://www.elastic.co/elasticsearch/) has the possibility to [index dense vectors](https://www.elastic.co/what-is/vector-search) and to use them for document scoring. We can easily index embedding vectors, store other data alongside our vectors and, most importantly, efficiently retrieve relevant entries using [approximate nearest neighbor search](https://www.elastic.co/blog/introducing-approximate-nearest-neighbor-search-in-elasticsearch-8-0) (HNSW, see also below) on the embeddings.

@@ -77,38 +87,37 @@ For further details, see [semantic_search_quora_elasticsearch.py](semantic_searc


 ## Approximate Nearest Neighbor
-Searching a large corpus with millions of embeddings can be time-consuming if exact nearest neighbor search is used (like it is used by *util.semantic_search*).
+Searching a large corpus with millions of embeddings can be time-consuming if exact nearest neighbor search is used (like it is used by [<code>util.semantic_search</code>](../../../docs/package_reference/util.html#sentence_transformers.util.semantic_search)).

-In that case, Approximate Nearest Neighbor (ANN) can be helpful. Here, the data is partitioned into smaller fractions of similar embeddings. This index can be searched efficiently and the embeddings with the highest similarity (the nearest neighbors) can be retrieved within milliseconds, even if you have millions of vectors.
-
-However, the results are not necessarily exact. It is possible that some vectors with high similarity will be missed. That's the reason why it is called approximate nearest neighbor.
+In that case, Approximate Nearest Neighbor (ANN) can be helpful. Here, the data is partitioned into smaller fractions of similar embeddings. This index can be searched efficiently and the embeddings with the highest similarity (the nearest neighbors) can be retrieved within milliseconds, even if you have millions of vectors. However, the results are not necessarily exact. It is possible that some vectors with high similarity will be missed.

 For all ANN methods, there are usually one or more parameters to tune that determine the recall-speed trade-off. If you want the highest speed, you have a high chance of missing hits. If you want high recall, the search speed decreases.

-Three popular libraries for approximate nearest neighbor are [Annoy](https://github.com/spotify/annoy), [FAISS](https://github.com/facebookresearch/faiss), and [hnswlib](https://github.com/nmslib/hnswlib/). Personally I find hnswlib the most suitable library: It is easy to use, offers a great performance and has nice features included that are important for real applications.
+Three popular libraries for approximate nearest neighbor are [Annoy](https://github.com/spotify/annoy), [FAISS](https://github.com/facebookresearch/faiss), and [hnswlib](https://github.com/nmslib/hnswlib/).

 Examples:
+
 - [semantic_search_quora_hnswlib.py](semantic_search_quora_hnswlib.py)
 - [semantic_search_quora_annoy.py](semantic_search_quora_annoy.py)
 - [semantic_search_quora_faiss.py](semantic_search_quora_faiss.py)

 ## Retrieve & Re-Rank
-For complex semantic search scenarios, a retrieve & re-rank pipeline is advisable:
+For complex semantic search scenarios, a two-stage retrieve & re-rank pipeline is advisable:
 ![InformationRetrieval](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/InformationRetrieval.png)

 For further details, see [Retrieve & Re-rank](../retrieve_rerank/README.md).

 ## Examples

-In the following we list examples for different use-cases.
+We list a handful of common use cases:

 ### Similar Questions Retrieval
-[semantic_search_quora_pytorch.py](semantic_search_quora_pytorch.py) [ [Colab version](https://colab.research.google.com/drive/12cn5Oo0v3HfQQ8Tv6-ukgxXSmT3zl35A?usp=sharing) ] shows an example based on the [Quora duplicate questions](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) dataset. The user can enter a question, and the code retrieves the most similar questions from the dataset using the *util.semantic_search* method. As model, we use *distilbert-multilingual-nli-stsb-quora-ranking*, which was trained to identify similar questions and supports 50+ languages. Hence, the user can input the question in any of the 50+ languages. This is a **symmetric search task**, as the search queries have the same length and content as the questions in the corpus.
+[semantic_search_quora_pytorch.py](semantic_search_quora_pytorch.py) [ [Colab version](https://colab.research.google.com/drive/12cn5Oo0v3HfQQ8Tv6-ukgxXSmT3zl35A?usp=sharing) ] shows an example based on the [Quora duplicate questions](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) dataset. The user can enter a question, and the code retrieves the most similar questions from the dataset using the [<code>util.semantic_search</code>](../../../docs/package_reference/util.html#sentence_transformers.util.semantic_search) method. As model, we use [distilbert-multilingual-nli-stsb-quora-ranking](https://huggingface.co/sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking), which was trained to identify similar questions and supports 50+ languages. Hence, the user can input the question in any of the 50+ languages. This is a **symmetric search task**, as the search queries have the same length and content as the questions in the corpus.

 ### Similar Publication Retrieval
-[semantic_search_publications.py](semantic_search_publications.py) [ [Colab version](https://colab.research.google.com/drive/12hfBveGHRsxhPIUMmJYrll2lFU4fOX06?usp=sharing) ] shows an example how to find similar scientific publications. As corpus, we use all publications that have been presented at the EMNLP 2016 - 2018 conferences. As search query, we input the title and abstract of more recent publications and find related publications from our copurs. We use the [SPECTER](https://arxiv.org/abs/2004.07180) model. This is a **symmetric search task**, as the paper in the corpus consists of title & abstract and we search for title & abstract.
+[semantic_search_publications.py](semantic_search_publications.py) [ [Colab version](https://colab.research.google.com/drive/12hfBveGHRsxhPIUMmJYrll2lFU4fOX06?usp=sharing) ] shows an example how to find similar scientific publications. As corpus, we use all publications that have been presented at the EMNLP 2016 - 2018 conferences. As search query, we input the title and abstract of more recent publications and find related publications from our copurs. We use the [SPECTER](https://huggingface.co/sentence-transformers/allenai-specter) model. This is a **symmetric search task**, as the paper in the corpus consists of title & abstract and we search for title & abstract.

 ### Question & Answer Retrieval
-[semantic_search_wikipedia_qa.py](semantic_search_wikipedia_qa.py) [ [Colab Version](https://colab.research.google.com/drive/11GunvCqJuebfeTlgbJWkIMT0xJH6PWF1?usp=sharing) ]: This example uses a model that was trained on the [Natural Questions dataset](https://ai.google.com/research/NaturalQuestions/). It consists of about 100k real Google search queries, together with an annotated passage from Wikipedia that provides the answer. It is an example of an **asymmetric search task**. As corpus, we use the smaller [Simple English Wikipedia](https://simple.wikipedia.org/wiki/Main_Page) so that it fits easily into memory.
+[semantic_search_wikipedia_qa.py](semantic_search_wikipedia_qa.py) [ [Colab Version](https://colab.research.google.com/drive/11GunvCqJuebfeTlgbJWkIMT0xJH6PWF1?usp=sharing) ]: This example uses a model that was trained on the [Natural Questions dataset](https://huggingface.co/datasets/sentence-transformers/natural-questions). It consists of about 100k real Google search queries, together with an annotated passage from Wikipedia that provides the answer. It is an example of an **asymmetric search task**. As corpus, we use the smaller [Simple English Wikipedia](https://simple.wikipedia.org/wiki/Main_Page) so that it fits easily into memory.

-[retrieve_rerank_simple_wikipedia.ipynb](../retrieve_rerank/retrieve_rerank_simple_wikipedia.ipynb) [ [Colab Version](https://colab.research.google.com/github/UKPLab/sentence-transformers/blob/master/examples/applications/retrieve_rerank/retrieve_rerank_simple_wikipedia.ipynb) ]: This script uses the [Retrieve & Re-rank](../retrieve_rerank/README.md) strategy and is an example for an **asymmetric search task**. We split all Wikipedia articles into paragraphs and encode them with a bi-encoder. If a new query / question is entered, it is encoded by the same bi-encoder and the paragraphs with the highest cosine-similarity are retrieved (see [semantic search](../semantic-search/README.md)). Next, the retrieved candidates are scored by a Cross-Encoder re-ranker and the 5 passages with the highest score from the Cross-Encoder are presented to the user. We use models that were trained on the [MS Marco Passage Reranking](https://github.com/microsoft/MSMARCO-Passage-Ranking/) dataset, a dataset with about 500k real queries from Bing search.
+[retrieve_rerank_simple_wikipedia.ipynb](../retrieve_rerank/retrieve_rerank_simple_wikipedia.ipynb) [ [Colab Version](https://colab.research.google.com/github/UKPLab/sentence-transformers/blob/master/examples/applications/retrieve_rerank/retrieve_rerank_simple_wikipedia.ipynb) ]: This script uses the [Retrieve & Re-rank](../retrieve_rerank/README.md) strategy and is an example for an **asymmetric search task**. We split all Wikipedia articles into paragraphs and encode them with a bi-encoder. If a new query / question is entered, it is encoded by the same bi-encoder and the paragraphs with the highest cosine-similarity are retrieved. Next, the retrieved candidates are scored by a Cross-Encoder re-ranker and the 5 passages with the highest score from the Cross-Encoder are presented to the user. We use models that were trained on the [MS Marco Passage Reranking](https://github.com/microsoft/MSMARCO-Passage-Ranking/) dataset, a dataset with about 500k real queries from Bing search.
--- a/examples/applications/semantic-search/semantic_search.py
+++ b/examples/applications/semantic-search/semantic_search.py
@@ -7,9 +7,10 @@ we want to find the most similar sentence in this corpus.
 This script outputs for various queries the top 5 most similar sentences in the corpus.
 """

-from sentence_transformers import SentenceTransformer, util
 import torch

+from sentence_transformers import SentenceTransformer
+
 embedder = SentenceTransformer("all-MiniLM-L6-v2")

 # Corpus with example sentences
@@ -24,6 +25,7 @@ corpus = [
    "A monkey is playing drums.",
    "A cheetah is running behind its prey.",
 ]
+# Use "convert_to_tensor=True" to keep the tensors on GPU (if available)
 corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

 # Query sentences:
@@ -33,21 +35,19 @@ queries = [
    "A cheetah chases prey on across a field.",
 ]

-
 # Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
 top_k = min(5, len(corpus))
 for query in queries:
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    # We use cosine-similarity and torch.topk to find the highest 5 scores
-    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
-    top_results = torch.topk(cos_scores, k=top_k)
+    similarity_scores = embedder.similarity(query_embedding, corpus_embeddings)[0]
+    scores, indices = torch.topk(similarity_scores, k=top_k)

-    print("\n\n======================\n\n")
-    print("Query:", query)
-    print("\nTop 5 most similar sentences in corpus:")
+    print("\nQuery:", query)
+    print("Top 5 most similar sentences in corpus:")

-    for score, idx in zip(top_results[0], top_results[1]):
+    for score, idx in zip(scores, indices):
        print(corpus[idx], "(Score: {:.4f})".format(score))

    """

--- a/examples/applications/semantic-search/semantic_search_publications.py
+++ b/examples/applications/semantic-search/semantic_search_publications.py
@@ -11,6 +11,7 @@ Colab example: https://colab.research.google.com/drive/12hfBveGHRsxhPIUMmJYrll2l

 import json
 import os
+
 from sentence_transformers import SentenceTransformer, util

 # First, we load the papers dataset (with title and abstract information)

--- a/examples/applications/semantic-search/semantic_search_quora_annoy.py
+++ b/examples/applications/semantic-search/semantic_search_quora_annoy.py
@@ -26,14 +26,15 @@ that it aligned for 100 languages. I.e., you can type in a question in various l
 return the closest questions in the corpus (questions in the corpus are mainly in English).
 """

-from sentence_transformers import SentenceTransformer, util
-import os
 import csv
+import os
 import pickle
 import time
+
 import torch
 from annoy import AnnoyIndex

+from sentence_transformers import SentenceTransformer, util

 model_name = "quora-distilbert-multilingual"
 model = SentenceTransformer(model_name)

--- a/examples/applications/semantic-search/semantic_search_quora_elasticsearch.py
+++ b/examples/applications/semantic-search/semantic_search_quora_elasticsearch.py
@@ -19,14 +19,15 @@ that it aligned for 100 languages. I.e., you can type in a question in various l
 return the closest questions in the corpus (questions in the corpus are mainly in English).
 """

-from sentence_transformers import SentenceTransformer, util
-import os
-from elasticsearch import Elasticsearch, helpers
-from ssl import create_default_context
 import csv
+import os
 import time
+from ssl import create_default_context
+
 import tqdm.autonotebook
+from elasticsearch import Elasticsearch, helpers

+from sentence_transformers import SentenceTransformer, util

 es = Elasticsearch(
    hosts=["https://localhost:9200"],

--- a/examples/applications/semantic-search/semantic_search_quora_faiss.py
+++ b/examples/applications/semantic-search/semantic_search_quora_faiss.py
@@ -23,14 +23,15 @@ that it aligned for 100 languages. I.e., you can type in a question in various l
 return the closest questions in the corpus (questions in the corpus are mainly in English).
 """

-from sentence_transformers import SentenceTransformer, util
-import os
 import csv
+import os
 import pickle
 import time
+
 import faiss
 import numpy as np

+from sentence_transformers import SentenceTransformer, util

 model_name = "quora-distilbert-multilingual"
 model = SentenceTransformer(model_name)

--- a/examples/applications/semantic-search/semantic_search_quora_hnswlib.py
+++ b/examples/applications/semantic-search/semantic_search_quora_hnswlib.py
@@ -21,13 +21,14 @@ that it aligned for 100 languages. I.e., you can type in a question in various l
 return the closest questions in the corpus (questions in the corpus are mainly in English).
 """

-from sentence_transformers import SentenceTransformer, util
-import os
 import csv
+import os
 import pickle
 import time
+
 import hnswlib

+from sentence_transformers import SentenceTransformer, util

 model_name = "quora-distilbert-multilingual"
 model = SentenceTransformer(model_name)

--- a/examples/applications/semantic-search/semantic_search_quora_pytorch.py
+++ b/examples/applications/semantic-search/semantic_search_quora_pytorch.py
@@ -13,12 +13,12 @@ return the closest questions in the corpus (questions in the corpus are mainly i
 Google Colab example: https://colab.research.google.com/drive/12cn5Oo0v3HfQQ8Tv6-ukgxXSmT3zl35A?usp=sharing
 """

-from sentence_transformers import SentenceTransformer, util
-import os
 import csv
+import os
 import pickle
 import time

+from sentence_transformers import SentenceTransformer, util

 model_name = "quora-distilbert-multilingual"
 model = SentenceTransformer(model_name)

--- a/examples/applications/semantic-search/semantic_search_wikipedia_qa.py
+++ b/examples/applications/semantic-search/semantic_search_wikipedia_qa.py
@@ -13,13 +13,15 @@ Wikipedia article tile together with the individual text passages.
 Google Colab Example: https://colab.research.google.com/drive/11GunvCqJuebfeTlgbJWkIMT0xJH6PWF1?usp=sharing
 """

-import json
-from sentence_transformers import SentenceTransformer, util
-import time
 import gzip
+import json
 import os
+import time
+
 import torch

+from sentence_transformers import SentenceTransformer, util
+
 # We use the Bi-Encoder to encode all passages, so that we can use it with semantic search
 model_name = "nq-distilbert-base-v1"
 bi_encoder = SentenceTransformer(model_name)