first add

24db6dab · Rayyyyy · 24db6dab · 24db6dab · 24db6dab · 24db6dab
Commit 24db6dab authored Apr 12, 2024 by Rayyyyy
20 changed files
--- a/sentence_transformers/models/tokenizer/WhitespaceTokenizer.py
+++ b/sentence_transformers/models/tokenizer/WhitespaceTokenizer.py
+from typing import List, Iterable
+import collections
+import string
+import os
+import json
+from .WordTokenizer import WordTokenizer, ENGLISH_STOP_WORDS
+class WhitespaceTokenizer(WordTokenizer):
+    """
+    Simple and fast white-space tokenizer. Splits sentence based on white spaces.
+    Punctuation are stripped from tokens.
+    """
+    def __init__(
+        self, vocab: Iterable[str] = [], stop_words: Iterable[str] = ENGLISH_STOP_WORDS, do_lower_case: bool = False
+    ):
+        self.stop_words = set(stop_words)
+        self.do_lower_case = do_lower_case
+        self.set_vocab(vocab)
+    def get_vocab(self):
+        return self.vocab
+    def set_vocab(self, vocab: Iterable[str]):
+        self.vocab = vocab
+        self.word2idx = collections.OrderedDict([(word, idx) for idx, word in enumerate(vocab)])
+    def tokenize(self, text: str, **kwargs) -> List[int]:
+        if self.do_lower_case:
+            text = text.lower()
+        tokens = text.split()
+        tokens_filtered = []
+        for token in tokens:
+            if token in self.stop_words:
+                continue
+            elif token in self.word2idx:
+                tokens_filtered.append(self.word2idx[token])
+                continue
+            token = token.strip(string.punctuation)
+            if token in self.stop_words:
+                continue
+            elif len(token) > 0 and token in self.word2idx:
+                tokens_filtered.append(self.word2idx[token])
+                continue
+            token = token.lower()
+            if token in self.stop_words:
+                continue
+            elif token in self.word2idx:
+                tokens_filtered.append(self.word2idx[token])
+                continue
+        return tokens_filtered
+    def save(self, output_path: str):
+        with open(os.path.join(output_path, "whitespacetokenizer_config.json"), "w") as fOut:
+            json.dump(
+                {
+                    "vocab": list(self.word2idx.keys()),
+                    "stop_words": list(self.stop_words),
+                    "do_lower_case": self.do_lower_case,
+                },
+                fOut,
+            )
+    @staticmethod
+    def load(input_path: str):
+        with open(os.path.join(input_path, "whitespacetokenizer_config.json"), "r") as fIn:
+            config = json.load(fIn)
+        return WhitespaceTokenizer(**config)
--- a/sentence_transformers/models/tokenizer/WordTokenizer.py
+++ b/sentence_transformers/models/tokenizer/WordTokenizer.py
+from abc import ABC, abstractmethod
+from typing import List, Iterable
+ENGLISH_STOP_WORDS = [
+    "!",
+    '"',
+    "''",
+    "``",
+    "#",
+    "$",
+    "%",
+    "&",
+    "'",
+    "(",
+    ")",
+    "*",
+    "+",
+    ",",
+    "-",
+    ".",
+    "/",
+    ":",
+    ";",
+    "<",
+    "=",
+    ">",
+    "?",
+    "@",
+    "[",
+    "\\",
+    "]",
+    "^",
+    "_",
+    "`",
+    "{",
+    "|",
+    "}",
+    "~",
+    "a",
+    "about",
+    "above",
+    "across",
+    "after",
+    "afterwards",
+    "again",
+    "against",
+    "ain",
+    "all",
+    "almost",
+    "alone",
+    "along",
+    "already",
+    "also",
+    "although",
+    "always",
+    "am",
+    "among",
+    "amongst",
+    "amoungst",
+    "amount",
+    "an",
+    "and",
+    "another",
+    "any",
+    "anyhow",
+    "anyone",
+    "anything",
+    "anyway",
+    "anywhere",
+    "are",
+    "aren",
+    "around",
+    "as",
+    "at",
+    "back",
+    "be",
+    "became",
+    "because",
+    "become",
+    "becomes",
+    "becoming",
+    "been",
+    "before",
+    "beforehand",
+    "behind",
+    "being",
+    "below",
+    "beside",
+    "besides",
+    "between",
+    "beyond",
+    "bill",
+    "both",
+    "bottom",
+    "but",
+    "by",
+    "call",
+    "can",
+    "cannot",
+    "cant",
+    "co",
+    "con",
+    "could",
+    "couldn",
+    "couldnt",
+    "cry",
+    "d",
+    "de",
+    "describe",
+    "detail",
+    "did",
+    "didn",
+    "do",
+    "does",
+    "doesn",
+    "doing",
+    "don",
+    "done",
+    "down",
+    "due",
+    "during",
+    "each",
+    "eg",
+    "eight",
+    "either",
+    "eleven",
+    "else",
+    "elsewhere",
+    "empty",
+    "enough",
+    "etc",
+    "even",
+    "ever",
+    "every",
+    "everyone",
+    "everything",
+    "everywhere",
+    "except",
+    "few",
+    "fifteen",
+    "fifty",
+    "fill",
+    "find",
+    "fire",
+    "first",
+    "five",
+    "for",
+    "former",
+    "formerly",
+    "forty",
+    "found",
+    "four",
+    "from",
+    "front",
+    "full",
+    "further",
+    "get",
+    "give",
+    "go",
+    "had",
+    "hadn",
+    "has",
+    "hasn",
+    "hasnt",
+    "have",
+    "haven",
+    "having",
+    "he",
+    "hence",
+    "her",
+    "here",
+    "hereafter",
+    "hereby",
+    "herein",
+    "hereupon",
+    "hers",
+    "herself",
+    "him",
+    "himself",
+    "his",
+    "how",
+    "however",
+    "hundred",
+    "i",
+    "ie",
+    "if",
+    "in",
+    "inc",
+    "indeed",
+    "interest",
+    "into",
+    "is",
+    "isn",
+    "it",
+    "its",
+    "itself",
+    "just",
+    "keep",
+    "last",
+    "latter",
+    "latterly",
+    "least",
+    "less",
+    "ll",
+    "ltd",
+    "m",
+    "ma",
+    "made",
+    "many",
+    "may",
+    "me",
+    "meanwhile",
+    "might",
+    "mightn",
+    "mill",
+    "mine",
+    "more",
+    "moreover",
+    "most",
+    "mostly",
+    "move",
+    "much",
+    "must",
+    "mustn",
+    "my",
+    "myself",
+    "name",
+    "namely",
+    "needn",
+    "neither",
+    "never",
+    "nevertheless",
+    "next",
+    "nine",
+    "no",
+    "nobody",
+    "none",
+    "noone",
+    "nor",
+    "not",
+    "nothing",
+    "now",
+    "nowhere",
+    "o",
+    "of",
+    "off",
+    "often",
+    "on",
+    "once",
+    "one",
+    "only",
+    "onto",
+    "or",
+    "other",
+    "others",
+    "otherwise",
+    "our",
+    "ours",
+    "ourselves",
+    "out",
+    "over",
+    "own",
+    "part",
+    "per",
+    "perhaps",
+    "please",
+    "put",
+    "rather",
+    "re",
+    "s",
+    "same",
+    "see",
+    "seem",
+    "seemed",
+    "seeming",
+    "seems",
+    "serious",
+    "several",
+    "shan",
+    "she",
+    "should",
+    "shouldn",
+    "show",
+    "side",
+    "since",
+    "sincere",
+    "six",
+    "sixty",
+    "so",
+    "some",
+    "somehow",
+    "someone",
+    "something",
+    "sometime",
+    "sometimes",
+    "somewhere",
+    "still",
+    "such",
+    "system",
+    "t",
+    "take",
+    "ten",
+    "than",
+    "that",
+    "the",
+    "their",
+    "theirs",
+    "them",
+    "themselves",
+    "then",
+    "thence",
+    "there",
+    "thereafter",
+    "thereby",
+    "therefore",
+    "therein",
+    "thereupon",
+    "these",
+    "they",
+    "thick",
+    "thin",
+    "third",
+    "this",
+    "those",
+    "though",
+    "three",
+    "through",
+    "throughout",
+    "thru",
+    "thus",
+    "to",
+    "together",
+    "too",
+    "top",
+    "toward",
+    "towards",
+    "twelve",
+    "twenty",
+    "two",
+    "un",
+    "under",
+    "until",
+    "up",
+    "upon",
+    "us",
+    "ve",
+    "very",
+    "via",
+    "was",
+    "wasn",
+    "we",
+    "well",
+    "were",
+    "weren",
+    "what",
+    "whatever",
+    "when",
+    "whence",
+    "whenever",
+    "where",
+    "whereafter",
+    "whereas",
+    "whereby",
+    "wherein",
+    "whereupon",
+    "wherever",
+    "whether",
+    "which",
+    "while",
+    "whither",
+    "who",
+    "whoever",
+    "whole",
+    "whom",
+    "whose",
+    "why",
+    "will",
+    "with",
+    "within",
+    "without",
+    "won",
+    "would",
+    "wouldn",
+    "y",
+    "yet",
+    "you",
+    "your",
+    "yours",
+    "yourself",
+    "yourselves",
+]
+class WordTokenizer(ABC):
+    @abstractmethod
+    def set_vocab(self, vocab: Iterable[str]):
+        pass
+    @abstractmethod
+    def get_vocab(self, vocab: Iterable[str]):
+        pass
+    @abstractmethod
+    def tokenize(self, text: str, **kwargs) -> List[int]:
+        pass
+    @abstractmethod
+    def save(self, output_path: str):
+        pass
+    @staticmethod
+    @abstractmethod
+    def load(input_path: str):
+        pass
--- a/sentence_transformers/models/tokenizer/__init__.py
+++ b/sentence_transformers/models/tokenizer/__init__.py
+from .WordTokenizer import WordTokenizer, ENGLISH_STOP_WORDS
+from .WhitespaceTokenizer import WhitespaceTokenizer
+from .PhraseTokenizer import PhraseTokenizer
+__all__ = ["WordTokenizer", "WhitespaceTokenizer", "PhraseTokenizer", "ENGLISH_STOP_WORDS"]
--- a/sentence_transformers/quantization.py
+++ b/sentence_transformers/quantization.py
+import time
+from torch import Tensor
+from typing import List, Literal, Tuple, TYPE_CHECKING
+import numpy as np
+import logging
+from typing import Dict, Optional, Union
+logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    import faiss
+    import usearch
+def semantic_search_faiss(
+    query_embeddings: np.ndarray,
+    corpus_embeddings: Optional[np.ndarray] = None,
+    corpus_index: Optional["faiss.Index"] = None,
+    corpus_precision: Literal["float32", "uint8", "ubinary"] = "float32",
+    top_k: int = 10,
+    ranges: Optional[np.ndarray] = None,
+    calibration_embeddings: Optional[np.ndarray] = None,
+    rescore: bool = True,
+    rescore_multiplier: int = 2,
+    exact: bool = True,
+    output_index: bool = False,
+) -> Tuple[List[List[Dict[str, Union[int, float]]]], float, "faiss.Index"]:
+    """
+    Performs semantic search using the FAISS library.
+    Rescoring will be performed if:
+    1. `rescore` is True
+    2. The query embeddings are not quantized
+    3. The corpus is quantized, i.e. the corpus precision is not float32
+    Only if these conditions are true, will we search for `top_k * rescore_multiplier` samples and then rescore to only
+    keep `top_k`.
+    :param query_embeddings: Embeddings of the query sentences. Ideally not quantized to allow for rescoring.
+    :param corpus_embeddings: Embeddings of the corpus sentences. Either `corpus_embeddings` or `corpus_index` should
+        be used, not both. The embeddings can be quantized to "int8" or "binary" for more efficient search.
+    :param corpus_index: FAISS index for the corpus sentences. Either `corpus_embeddings` or `corpus_index` should
+        be used, not both.
+    :param corpus_precision: Precision of the corpus embeddings. The options are "float32", "int8", or "binary".
+        Default is "float32".
+    :param top_k: Number of top results to retrieve. Default is 10.
+    :param ranges: Ranges for quantization of embeddings. This is only used for int8 quantization, where the ranges
+        refers to the minimum and maximum values for each dimension. So, it's a 2D array with shape (2, embedding_dim).
+        Default is None, which means that the ranges will be calculated from the calibration embeddings.
+    :param calibration_embeddings: Embeddings used for calibration during quantization. This is only used for int8
+        quantization, where the calibration embeddings can be used to compute ranges, i.e. the minimum and maximum
+        values for each dimension. Default is None, which means that the ranges will be calculated from the query
+        embeddings. This is not recommended.
+    :param rescore: Whether to perform rescoring. Note that rescoring still will only be used if the query embeddings
+        are not quantized and the corpus is quantized, i.e. the corpus precision is not "float32". Default is True.
+    :param rescore_multiplier: Oversampling factor for rescoring. The code will now search `top_k * rescore_multiplier` samples
+        and then rescore to only keep `top_k`. Default is 2.
+    :param exact: Whether to use exact search or approximate search. Default is True.
+    :param output_index: Whether to output the FAISS index used for the search. Default is False.
+    :return: A tuple containing a list of search results and the time taken for the search. If `output_index` is True,
+        the tuple will also contain the FAISS index used for the search.
+    :raises ValueError: If both `corpus_embeddings` and `corpus_index` are provided or if neither is provided.
+    The list of search results is in the format: [[{"corpus_id": int, "score": float}, ...], ...]
+    The time taken for the search is a float value.
+    """
+    import faiss
+    if corpus_embeddings is not None and corpus_index is not None:
+        raise ValueError("Only corpus_embeddings or corpus_index should be used, not both.")
+    if corpus_embeddings is None and corpus_index is None:
+        raise ValueError("Either corpus_embeddings or corpus_index should be used.")
+    # If corpus_index is not provided, create a new index
+    if corpus_index is None:
+        if corpus_precision in ("float32", "uint8"):
+            if exact:
+                corpus_index = faiss.IndexFlatIP(corpus_embeddings.shape[1])
+            else:
+                corpus_index = faiss.IndexHNSWFlat(corpus_embeddings.shape[1], 16)
+        elif corpus_precision == "ubinary":
+            if exact:
+                corpus_index = faiss.IndexBinaryFlat(corpus_embeddings.shape[1] * 8)
+            else:
+                corpus_index = faiss.IndexBinaryHNSW(corpus_embeddings.shape[1] * 8, 16)
+        corpus_index.add(corpus_embeddings)
+    # If rescoring is enabled and the query embeddings are in float32, we need to quantize them
+    # to the same precision as the corpus embeddings. Also update the top_k value to account for the
+    # rescore_multiplier
+    rescore_embeddings = None
+    k = top_k
+    if query_embeddings.dtype not in (np.uint8, np.int8):
+        if rescore:
+            if corpus_precision != "float32":
+                rescore_embeddings = query_embeddings
+                k *= rescore_multiplier
+            else:
+                logger.warning(
+                    "Rescoring is enabled but the corpus is not quantized. Either pass `rescore=False` or "
+                    'quantize the corpus embeddings with `quantize_embeddings(embeddings, precision="...") `'
+                    'and pass `corpus_precision="..."` to `semantic_search_faiss`.'
+                )
+        query_embeddings = quantize_embeddings(
+            query_embeddings,
+            precision=corpus_precision,
+            ranges=ranges,
+            calibration_embeddings=calibration_embeddings,
+        )
+    elif rescore:
+        logger.warning(
+            "Rescoring is enabled but the query embeddings are quantized. Either pass `rescore=False` or don't quantize the query embeddings."
+        )
+    # Perform the search using the usearch index
+    start_t = time.time()
+    scores, indices = corpus_index.search(query_embeddings, k)
+    # If rescoring is enabled, we need to rescore the results using the rescore_embeddings
+    if rescore_embeddings is not None:
+        top_k_embeddings = np.array(
+            [[corpus_index.reconstruct(idx.item()) for idx in query_indices] for query_indices in indices]
+        )
+        # If the corpus precision is binary, we need to unpack the bits
+        if corpus_precision == "ubinary":
+            top_k_embeddings = np.unpackbits(top_k_embeddings, axis=-1).astype(int)
+        else:
+            top_k_embeddings = top_k_embeddings.astype(int)
+        # rescore_embeddings: [num_queries, embedding_dim]
+        # top_k_embeddings: [num_queries, top_k, embedding_dim]
+        # updated_scores: [num_queries, top_k]
+        # We use einsum to calculate the dot product between the query and the top_k embeddings, equivalent to looping
+        # over the queries and calculating 'rescore_embeddings[i] @ top_k_embeddings[i].T'
+        rescored_scores = np.einsum("ij,ikj->ik", rescore_embeddings, top_k_embeddings)
+        rescored_indices = np.argsort(-rescored_scores)[:, :top_k]
+        indices = indices[np.arange(len(query_embeddings))[:, None], rescored_indices]
+        scores = rescored_scores[np.arange(len(query_embeddings))[:, None], rescored_indices]
+    delta_t = time.time() - start_t
+    outputs = (
+        [
+            [
+                {"corpus_id": int(neighbor), "score": float(score)}
+                for score, neighbor in zip(scores[query_id], indices[query_id])
+            ]
+            for query_id in range(len(query_embeddings))
+        ],
+        delta_t,
+    )
+    if output_index:
+        outputs = (*outputs, corpus_index)
+    return outputs
+def semantic_search_usearch(
+    query_embeddings: np.ndarray,
+    corpus_embeddings: Optional[np.ndarray] = None,
+    corpus_index: Optional["usearch.index.Index"] = None,
+    corpus_precision: Literal["float32", "int8", "binary"] = "float32",
+    top_k: int = 10,
+    ranges: Optional[np.ndarray] = None,
+    calibration_embeddings: Optional[np.ndarray] = None,
+    rescore: bool = True,
+    rescore_multiplier: int = 2,
+    exact: bool = True,
+    output_index: bool = False,
+) -> Tuple[List[List[Dict[str, Union[int, float]]]], float, "usearch.index.Index"]:
+    """
+    Performs semantic search using the usearch library.
+    Rescoring will be performed if:
+    1. `rescore` is True
+    2. The query embeddings are not quantized
+    3. The corpus is quantized, i.e. the corpus precision is not float32
+    Only if these conditions are true, will we search for `top_k * rescore_multiplier` samples and then rescore to only
+    keep `top_k`.
+    :param query_embeddings: Embeddings of the query sentences. Ideally not quantized to allow for rescoring.
+    :param corpus_embeddings: Embeddings of the corpus sentences. Either `corpus_embeddings` or `corpus_index` should
+        be used, not both. The embeddings can be quantized to "int8" or "binary" for more efficient search.
+    :param corpus_index: usearch index for the corpus sentences. Either `corpus_embeddings` or `corpus_index` should
+        be used, not both.
+    :param corpus_precision: Precision of the corpus embeddings. The options are "float32", "int8", or "binary".
+        Default is "float32".
+    :param top_k: Number of top results to retrieve. Default is 10.
+    :param ranges: Ranges for quantization of embeddings. This is only used for int8 quantization, where the ranges
+        refers to the minimum and maximum values for each dimension. So, it's a 2D array with shape (2, embedding_dim).
+        Default is None, which means that the ranges will be calculated from the calibration embeddings.
+    :param calibration_embeddings: Embeddings used for calibration during quantization. This is only used for int8
+        quantization, where the calibration embeddings can be used to compute ranges, i.e. the minimum and maximum
+        values for each dimension. Default is None, which means that the ranges will be calculated from the query
+        embeddings. This is not recommended.
+    :param rescore: Whether to perform rescoring. Note that rescoring still will only be used if the query embeddings
+        are not quantized and the corpus is quantized, i.e. the corpus precision is not "float32". Default is True.
+    :param rescore_multiplier: Oversampling factor for rescoring. The code will now search `top_k * rescore_multiplier` samples
+        and then rescore to only keep `top_k`. Default is 2.
+    :param exact: Whether to use exact search or approximate search. Default is True.
+    :param output_index: Whether to output the usearch index used for the search. Default is False.
+    :return: A tuple containing a list of search results and the time taken for the search. If `output_index` is True,
+        the tuple will also contain the usearch index used for the search.
+    :raises ValueError: If both `corpus_embeddings` and `corpus_index` are provided or if neither is provided.
+    The list of search results is in the format: [[{"corpus_id": int, "score": float}, ...], ...]
+    The time taken for the search is a float value.
+    """
+    from usearch.index import Index
+    from usearch.compiled import ScalarKind
+    if corpus_embeddings is not None and corpus_index is not None:
+        raise ValueError("Only corpus_embeddings or corpus_index should be used, not both.")
+    if corpus_embeddings is None and corpus_index is None:
+        raise ValueError("Either corpus_embeddings or corpus_index should be used.")
+    if corpus_precision not in ["float32", "int8", "binary"]:
+        raise ValueError('corpus_precision must be "float32", "int8", or "binary" for usearch')
+    # If corpus_index is not provided, create a new index
+    if corpus_index is None:
+        if corpus_precision == "float32":
+            corpus_index = Index(
+                ndim=corpus_embeddings.shape[1],
+                metric="cos",
+                dtype="f32",
+            )
+        elif corpus_precision == "int8":
+            corpus_index = Index(
+                ndim=corpus_embeddings.shape[1],
+                metric="ip",
+                dtype="i8",
+            )
+        elif corpus_precision == "binary":
+            corpus_index = Index(
+                ndim=corpus_embeddings.shape[1],
+                metric="hamming",
+                dtype="i8",
+            )
+        corpus_index.add(np.arange(len(corpus_embeddings)), corpus_embeddings)
+    # If rescoring is enabled and the query embeddings are in float32, we need to quantize them
+    # to the same precision as the corpus embeddings. Also update the top_k value to account for the
+    # rescore_multiplier
+    rescore_embeddings = None
+    k = top_k
+    if query_embeddings.dtype not in (np.uint8, np.int8):
+        if rescore:
+            if corpus_index.dtype != ScalarKind.F32:
+                rescore_embeddings = query_embeddings
+                k *= rescore_multiplier
+            else:
+                logger.warning(
+                    "Rescoring is enabled but the corpus is not quantized. Either pass `rescore=False` or "
+                    'quantize the corpus embeddings with `quantize_embeddings(embeddings, precision="...") `'
+                    'and pass `corpus_precision="..."` to `semantic_search_usearch`.'
+                )
+        query_embeddings = quantize_embeddings(
+            query_embeddings,
+            precision=corpus_precision,
+            ranges=ranges,
+            calibration_embeddings=calibration_embeddings,
+        )
+    elif rescore:
+        logger.warning(
+            "Rescoring is enabled but the query embeddings are quantized. Either pass `rescore=False` or don't quantize the query embeddings."
+        )
+    # Perform the search using the usearch index
+    start_t = time.time()
+    matches = corpus_index.search(query_embeddings, count=k, exact=exact)
+    scores = matches.distances
+    indices = matches.keys
+    if scores.ndim < 2:
+        scores = np.atleast_2d(scores)
+    if indices.ndim < 2:
+        indices = np.atleast_2d(indices)
+    # If rescoring is enabled, we need to rescore the results using the rescore_embeddings
+    if rescore_embeddings is not None:
+        top_k_embeddings = np.array([corpus_index.get(query_indices) for query_indices in indices])
+        # If the corpus precision is binary, we need to unpack the bits
+        if corpus_precision == "binary":
+            top_k_embeddings = np.unpackbits(top_k_embeddings.astype(np.uint8), axis=-1)
+        top_k_embeddings = top_k_embeddings.astype(int)
+        # rescore_embeddings: [num_queries, embedding_dim]
+        # top_k_embeddings: [num_queries, top_k, embedding_dim]
+        # updated_scores: [num_queries, top_k]
+        # We use einsum to calculate the dot product between the query and the top_k embeddings, equivalent to looping
+        # over the queries and calculating 'rescore_embeddings[i] @ top_k_embeddings[i].T'
+        rescored_scores = np.einsum("ij,ikj->ik", rescore_embeddings, top_k_embeddings)
+        rescored_indices = np.argsort(-rescored_scores)[:, :top_k]
+        indices = indices[np.arange(len(query_embeddings))[:, None], rescored_indices]
+        scores = rescored_scores[np.arange(len(query_embeddings))[:, None], rescored_indices]
+    delta_t = time.time() - start_t
+    outputs = (
+        [
+            [
+                {"corpus_id": int(neighbor), "score": float(score)}
+                for score, neighbor in zip(scores[query_id], indices[query_id])
+            ]
+            for query_id in range(len(query_embeddings))
+        ],
+        delta_t,
+    )
+    if output_index:
+        outputs = (*outputs, corpus_index)
+    return outputs
+def quantize_embeddings(
+    embeddings: Union[Tensor, np.ndarray],
+    precision: Literal["float32", "int8", "uint8", "binary", "ubinary"],
+    ranges: Optional[np.ndarray] = None,
+    calibration_embeddings: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """
+    Quantizes embeddings to a lower precision. This can be used to reduce the memory footprint and increase the
+    speed of similarity search. The supported precisions are "float32", "int8", "uint8", "binary", and "ubinary".
+    :param embeddings: Unquantized (e.g. float) embeddings with to quantize to a given precision
+    :param precision: The precision to convert to. Options are "float32", "int8", "uint8", "binary", "ubinary".
+    :param ranges: Ranges for quantization of embeddings. This is only used for int8 quantization, where the ranges
+        refers to the minimum and maximum values for each dimension. So, it's a 2D array with shape (2, embedding_dim).
+        Default is None, which means that the ranges will be calculated from the calibration embeddings.
+    :type ranges: Optional[np.ndarray]
+    :param calibration_embeddings: Embeddings used for calibration during quantization. This is only used for int8
+        quantization, where the calibration embeddings can be used to compute ranges, i.e. the minimum and maximum
+        values for each dimension. Default is None, which means that the ranges will be calculated from the query
+        embeddings. This is not recommended.
+    :type calibration_embeddings: Optional[np.ndarray]
+    :return: Quantized embeddings with the specified precision
+    """
+    if isinstance(embeddings, Tensor):
+        embeddings = embeddings.cpu().numpy()
+    elif isinstance(embeddings, list):
+        if isinstance(embeddings[0], Tensor):
+            embeddings = [embedding.cpu().numpy() for embedding in embeddings]
+        embeddings = np.array(embeddings)
+    if embeddings.dtype in (np.uint8, np.int8):
+        raise Exception("Embeddings to quantize must be float rather than int8 or uint8.")
+    if precision == "float32":
+        return embeddings.astype(np.float32)
+    if precision.endswith("int8"):
+        # Either use the 1. provided ranges, 2. the calibration dataset or 3. the provided embeddings
+        if ranges is None:
+            if calibration_embeddings is not None:
+                ranges = np.vstack((np.min(calibration_embeddings, axis=0), np.max(calibration_embeddings, axis=0)))
+            else:
+                if embeddings.shape[0] < 100:
+                    logger.warning(
+                        f"Computing {precision} quantization buckets based on {len(embeddings)} embedding{'s' if len(embeddings) != 1 else ''}."
+                        f" {precision} quantization is more stable with `ranges` calculated from more embeddings "
+                        "or a `calibration_embeddings` that can be used to calculate the buckets."
+                    )
+                ranges = np.vstack((np.min(embeddings, axis=0), np.max(embeddings, axis=0)))
+        starts = ranges[0, :]
+        steps = (ranges[1, :] - ranges[0, :]) / 255
+        if precision == "uint8":
+            return ((embeddings - starts) / steps).astype(np.uint8)
+        elif precision == "int8":
+            return ((embeddings - starts) / steps - 128).astype(np.int8)
+    if precision == "binary":
+        return (np.packbits(embeddings > 0).reshape(embeddings.shape[0], -1) - 128).astype(np.int8)
+    if precision == "ubinary":
+        return np.packbits(embeddings > 0).reshape(embeddings.shape[0], -1)
+    raise ValueError(f"Precision {precision} is not supported")
--- a/sentence_transformers/readers/InputExample.py
+++ b/sentence_transformers/readers/InputExample.py
+from typing import Union, List
+class InputExample:
+    """
+    Structure for one input example with texts, the label and a unique id
+    """
+    def __init__(self, guid: str = "", texts: List[str] = None, label: Union[int, float] = 0):
+        """
+        Creates one InputExample with the given texts, guid and label
+        :param guid
+            id for the example
+        :param texts
+            the texts for the example.
+        :param label
+            the label for the example
+        """
+        self.guid = guid
+        self.texts = texts
+        self.label = label
+    def __str__(self):
+        return "<InputExample> label: {}, texts: {}".format(str(self.label), "; ".join(self.texts))
--- a/sentence_transformers/readers/LabelSentenceReader.py
+++ b/sentence_transformers/readers/LabelSentenceReader.py
+from . import InputExample
+import os
+class LabelSentenceReader:
+    """Reads in a file that has at least two columns: a label and a sentence.
+    This reader can for example be used with the BatchHardTripletLoss.
+    Maps labels automatically to integers"""
+    def __init__(self, folder, label_col_idx=0, sentence_col_idx=1, separator="\t"):
+        self.folder = folder
+        self.label_map = {}
+        self.label_col_idx = label_col_idx
+        self.sentence_col_idx = sentence_col_idx
+        self.separator = separator
+    def get_examples(self, filename, max_examples=0):
+        examples = []
+        id = 0
+        for line in open(os.path.join(self.folder, filename), encoding="utf-8"):
+            splits = line.strip().split(self.separator)
+            label = splits[self.label_col_idx]
+            sentence = splits[self.sentence_col_idx]
+            if label not in self.label_map:
+                self.label_map[label] = len(self.label_map)
+            label_id = self.label_map[label]
+            guid = "%s-%d" % (filename, id)
+            id += 1
+            examples.append(InputExample(guid=guid, texts=[sentence], label=label_id))
+            if 0 < max_examples <= id:
+                break
+        return examples
--- a/sentence_transformers/readers/NLIDataReader.py
+++ b/sentence_transformers/readers/NLIDataReader.py
+from . import InputExample
+import gzip
+import os
+class NLIDataReader(object):
+    """
+    Reads in the Stanford NLI dataset and the MultiGenre NLI dataset
+    """
+    def __init__(self, dataset_folder):
+        self.dataset_folder = dataset_folder
+    def get_examples(self, filename, max_examples=0):
+        """
+        data_splits specified which data split to use (train, dev, test).
+        Expects that self.dataset_folder contains the files s1.$data_split.gz,  s2.$data_split.gz,
+        labels.$data_split.gz, e.g., for the train split, s1.train.gz, s2.train.gz, labels.train.gz
+        """
+        s1 = gzip.open(os.path.join(self.dataset_folder, "s1." + filename), mode="rt", encoding="utf-8").readlines()
+        s2 = gzip.open(os.path.join(self.dataset_folder, "s2." + filename), mode="rt", encoding="utf-8").readlines()
+        labels = gzip.open(
+            os.path.join(self.dataset_folder, "labels." + filename), mode="rt", encoding="utf-8"
+        ).readlines()
+        examples = []
+        id = 0
+        for sentence_a, sentence_b, label in zip(s1, s2, labels):
+            guid = "%s-%d" % (filename, id)
+            id += 1
+            examples.append(InputExample(guid=guid, texts=[sentence_a, sentence_b], label=self.map_label(label)))
+            if 0 < max_examples <= len(examples):
+                break
+        return examples
+    @staticmethod
+    def get_labels():
+        return {"contradiction": 0, "entailment": 1, "neutral": 2}
+    def get_num_labels(self):
+        return len(self.get_labels())
+    def map_label(self, label):
+        return self.get_labels()[label.strip().lower()]
--- a/sentence_transformers/readers/PairedFilesReader.py
+++ b/sentence_transformers/readers/PairedFilesReader.py
+from . import InputExample
+import gzip
+class PairedFilesReader(object):
+    """
+    Reads in the a Pair Dataset, split in two files
+    """
+    def __init__(self, filepaths):
+        self.filepaths = filepaths
+    def get_examples(self, max_examples=0):
+        """ """
+        fIns = []
+        for filepath in self.filepaths:
+            fIn = (
+                gzip.open(filepath, "rt", encoding="utf-8")
+                if filepath.endswith(".gz")
+                else open(filepath, encoding="utf-8")
+            )
+            fIns.append(fIn)
+        examples = []
+        eof = False
+        while not eof:
+            texts = []
+            for fIn in fIns:
+                text = fIn.readline()
+                if text == "":
+                    eof = True
+                    break
+                texts.append(text)
+            if eof:
+                break
+            examples.append(InputExample(guid=str(len(examples)), texts=texts, label=1))
+            if max_examples > 0 and len(examples) >= max_examples:
+                break
+        return examples
--- a/sentence_transformers/readers/STSDataReader.py
+++ b/sentence_transformers/readers/STSDataReader.py
+from . import InputExample
+import csv
+import gzip
+import os
+class STSDataReader:
+    """
+    Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)
+    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
+    """
+    def __init__(
+        self,
+        dataset_folder,
+        s1_col_idx=0,
+        s2_col_idx=1,
+        score_col_idx=2,
+        delimiter="\t",
+        quoting=csv.QUOTE_NONE,
+        normalize_scores=True,
+        min_score=0,
+        max_score=5,
+    ):
+        self.dataset_folder = dataset_folder
+        self.score_col_idx = score_col_idx
+        self.s1_col_idx = s1_col_idx
+        self.s2_col_idx = s2_col_idx
+        self.delimiter = delimiter
+        self.quoting = quoting
+        self.normalize_scores = normalize_scores
+        self.min_score = min_score
+        self.max_score = max_score
+    def get_examples(self, filename, max_examples=0):
+        """
+        filename specified which data split to use (train.csv, dev.csv, test.csv).
+        """
+        filepath = os.path.join(self.dataset_folder, filename)
+        with gzip.open(filepath, "rt", encoding="utf8") if filename.endswith(".gz") else open(
+            filepath, encoding="utf-8"
+        ) as fIn:
+            data = csv.reader(fIn, delimiter=self.delimiter, quoting=self.quoting)
+            examples = []
+            for id, row in enumerate(data):
+                score = float(row[self.score_col_idx])
+                if self.normalize_scores:  # Normalize to a 0...1 value
+                    score = (score - self.min_score) / (self.max_score - self.min_score)
+                s1 = row[self.s1_col_idx]
+                s2 = row[self.s2_col_idx]
+                examples.append(InputExample(guid=filename + str(id), texts=[s1, s2], label=score))
+                if max_examples > 0 and len(examples) >= max_examples:
+                    break
+        return examples
+class STSBenchmarkDataReader(STSDataReader):
+    """
+    Reader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
+    Scores are normalized from 0...5 to 0...1
+    """
+    def __init__(
+        self,
+        dataset_folder,
+        s1_col_idx=5,
+        s2_col_idx=6,
+        score_col_idx=4,
+        delimiter="\t",
+        quoting=csv.QUOTE_NONE,
+        normalize_scores=True,
+        min_score=0,
+        max_score=5,
+    ):
+        super().__init__(
+            dataset_folder=dataset_folder,
+            s1_col_idx=s1_col_idx,
+            s2_col_idx=s2_col_idx,
+            score_col_idx=score_col_idx,
+            delimiter=delimiter,
+            quoting=quoting,
+            normalize_scores=normalize_scores,
+            min_score=min_score,
+            max_score=max_score,
+        )
--- a/sentence_transformers/readers/TripletReader.py
+++ b/sentence_transformers/readers/TripletReader.py
+from . import InputExample
+import csv
+import os
+class TripletReader(object):
+    """
+    Reads in the a Triplet Dataset: Each line contains (at least) 3 columns, one anchor column (s1),
+    one positive example (s2) and one negative example (s3)
+    """
+    def __init__(
+        self,
+        dataset_folder,
+        s1_col_idx=0,
+        s2_col_idx=1,
+        s3_col_idx=2,
+        has_header=False,
+        delimiter="\t",
+        quoting=csv.QUOTE_NONE,
+    ):
+        self.dataset_folder = dataset_folder
+        self.s1_col_idx = s1_col_idx
+        self.s2_col_idx = s2_col_idx
+        self.s3_col_idx = s3_col_idx
+        self.has_header = has_header
+        self.delimiter = delimiter
+        self.quoting = quoting
+    def get_examples(self, filename, max_examples=0):
+        """ """
+        data = csv.reader(
+            open(os.path.join(self.dataset_folder, filename), encoding="utf-8"),
+            delimiter=self.delimiter,
+            quoting=self.quoting,
+        )
+        examples = []
+        if self.has_header:
+            next(data)
+        for id, row in enumerate(data):
+            s1 = row[self.s1_col_idx]
+            s2 = row[self.s2_col_idx]
+            s3 = row[self.s3_col_idx]
+            examples.append(InputExample(texts=[s1, s2, s3]))
+            if max_examples > 0 and len(examples) >= max_examples:
+                break
+        return examples
--- a/sentence_transformers/readers/__init__.py
+++ b/sentence_transformers/readers/__init__.py
+from .InputExample import InputExample
+from .LabelSentenceReader import LabelSentenceReader
+from .NLIDataReader import NLIDataReader
+from .STSDataReader import STSDataReader, STSBenchmarkDataReader
+from .TripletReader import TripletReader
+__all__ = [
+    "InputExample",
+    "LabelSentenceReader",
+    "NLIDataReader",
+    "STSDataReader",
+    "STSBenchmarkDataReader",
+    "TripletReader",
+]
--- a/sentence_transformers/util.py
+++ b/sentence_transformers/util.py
+import functools
+import requests
+from torch import Tensor, device
+from typing import List, Callable, Literal
+from tqdm.autonotebook import tqdm
+import sys
+import importlib
+import os
+import torch
+import numpy as np
+import queue
+import logging
+from typing import Dict, Optional, Union, overload
+from transformers import is_torch_npu_available
+from huggingface_hub import snapshot_download, hf_hub_download
+import heapq
+logger = logging.getLogger(__name__)
+def pytorch_cos_sim(a: Tensor, b: Tensor) -> Tensor:
+    """
+    Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
+    :return: Matrix with res[i][j]  = cos_sim(a[i], b[j])
+    """
+    return cos_sim(a, b)
+def cos_sim(a: Tensor, b: Tensor) -> Tensor:
+    """
+    Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
+    :return: Matrix with res[i][j]  = cos_sim(a[i], b[j])
+    """
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    if len(a.shape) == 1:
+        a = a.unsqueeze(0)
+    if len(b.shape) == 1:
+        b = b.unsqueeze(0)
+    a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
+    b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
+    return torch.mm(a_norm, b_norm.transpose(0, 1))
+def dot_score(a: Tensor, b: Tensor) -> Tensor:
+    """
+    Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
+    :return: Matrix with res[i][j]  = dot_prod(a[i], b[j])
+    """
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    if len(a.shape) == 1:
+        a = a.unsqueeze(0)
+    if len(b.shape) == 1:
+        b = b.unsqueeze(0)
+    return torch.mm(a, b.transpose(0, 1))
+def pairwise_dot_score(a: Tensor, b: Tensor) -> Tensor:
+    """
+    Computes the pairwise dot-product dot_prod(a[i], b[i])
+    :return: Vector with res[i] = dot_prod(a[i], b[i])
+    """
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    return (a * b).sum(dim=-1)
+def pairwise_cos_sim(a: Tensor, b: Tensor) -> Tensor:
+    """
+    Computes the pairwise cossim cos_sim(a[i], b[i])
+    :return: Vector with res[i] = cos_sim(a[i], b[i])
+    """
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    return pairwise_dot_score(normalize_embeddings(a), normalize_embeddings(b))
+def pairwise_angle_sim(x: Tensor, y: Tensor) -> Tensor:
+    """
+    Computes the absolute normalized angle distance;
+    see AnglELoss or https://arxiv.org/abs/2309.12871v1
+    for more information.
+    :return: Vector with res[i] = angle_sim(a[i], b[i])
+    """
+    if not isinstance(x, torch.Tensor):
+        x = torch.tensor(x)
+    if not isinstance(y, torch.Tensor):
+        y = torch.tensor(y)
+    # modified from https://github.com/SeanLee97/AnglE/blob/main/angle_emb/angle.py
+    # chunk both tensors to obtain complex components
+    a, b = torch.chunk(x, 2, dim=1)
+    c, d = torch.chunk(y, 2, dim=1)
+    z = torch.sum(c**2 + d**2, dim=1, keepdim=True)
+    re = (a * c + b * d) / z
+    im = (b * c - a * d) / z
+    dz = torch.sum(a**2 + b**2, dim=1, keepdim=True) ** 0.5
+    dw = torch.sum(c**2 + d**2, dim=1, keepdim=True) ** 0.5
+    re /= dz / dw
+    im /= dz / dw
+    norm_angle = torch.sum(torch.concat((re, im), dim=1), dim=1)
+    return torch.abs(norm_angle)
+def normalize_embeddings(embeddings: Tensor) -> Tensor:
+    """
+    Normalizes the embeddings matrix, so that each sentence embedding has unit length
+    """
+    return torch.nn.functional.normalize(embeddings, p=2, dim=1)
+@overload
+def truncate_embeddings(embeddings: np.ndarray, truncate_dim: Optional[int]) -> np.ndarray: ...
+@overload
+def truncate_embeddings(embeddings: torch.Tensor, truncate_dim: Optional[int]) -> torch.Tensor: ...
+def truncate_embeddings(
+    embeddings: Union[np.ndarray, torch.Tensor], truncate_dim: Optional[int]
+) -> Union[np.ndarray, torch.Tensor]:
+    """
+    :param embeddings: Embeddings to truncate.
+    :param truncate_dim: The dimension to truncate sentence embeddings to. `None` does no truncation.
+    :return: Truncated embeddings.
+    """
+    return embeddings[..., :truncate_dim]
+def paraphrase_mining(
+    model, sentences: List[str], show_progress_bar: bool = False, batch_size: int = 32, *args, **kwargs
+) -> List[List[Union[float, int]]]:
+    """
+    Given a list of sentences / texts, this function performs paraphrase mining. It compares all sentences against all
+    other sentences and returns a list with the pairs that have the highest cosine similarity score.
+    :param model: SentenceTransformer model for embedding computation
+    :param sentences: A list of strings (texts or sentences)
+    :param show_progress_bar: Plotting of a progress bar
+    :param batch_size: Number of texts that are encoded simultaneously by the model
+    :param query_chunk_size: Search for most similar pairs for #query_chunk_size at the same time. Decrease, to lower memory footprint (increases run-time).
+    :param corpus_chunk_size: Compare a sentence simultaneously against #corpus_chunk_size other sentences. Decrease, to lower memory footprint (increases run-time).
+    :param max_pairs: Maximal number of text pairs returned.
+    :param top_k: For each sentence, we retrieve up to top_k other sentences
+    :param score_function: Function for computing scores. By default, cosine similarity.
+    :return: Returns a list of triplets with the format [score, id1, id2]
+    """
+    # Compute embedding for the sentences
+    embeddings = model.encode(
+        sentences, show_progress_bar=show_progress_bar, batch_size=batch_size, convert_to_tensor=True
+    )
+    return paraphrase_mining_embeddings(embeddings, *args, **kwargs)
+def paraphrase_mining_embeddings(
+    embeddings: Tensor,
+    query_chunk_size: int = 5000,
+    corpus_chunk_size: int = 100000,
+    max_pairs: int = 500000,
+    top_k: int = 100,
+    score_function: Callable[[Tensor, Tensor], Tensor] = cos_sim,
+) -> List[List[Union[float, int]]]:
+    """
+    Given a list of sentences / texts, this function performs paraphrase mining. It compares all sentences against all
+    other sentences and returns a list with the pairs that have the highest cosine similarity score.
+    :param embeddings: A tensor with the embeddings
+    :param query_chunk_size: Search for most similar pairs for #query_chunk_size at the same time. Decrease, to lower memory footprint (increases run-time).
+    :param corpus_chunk_size: Compare a sentence simultaneously against #corpus_chunk_size other sentences. Decrease, to lower memory footprint (increases run-time).
+    :param max_pairs: Maximal number of text pairs returned.
+    :param top_k: For each sentence, we retrieve up to top_k other sentences
+    :param score_function: Function for computing scores. By default, cosine similarity.
+    :return: Returns a list of triplets with the format [score, id1, id2]
+    """
+    top_k += 1  # A sentence has the highest similarity to itself. Increase +1 as we are interest in distinct pairs
+    # Mine for duplicates
+    pairs = queue.PriorityQueue()
+    min_score = -1
+    num_added = 0
+    for corpus_start_idx in range(0, len(embeddings), corpus_chunk_size):
+        for query_start_idx in range(0, len(embeddings), query_chunk_size):
+            scores = score_function(
+                embeddings[query_start_idx : query_start_idx + query_chunk_size],
+                embeddings[corpus_start_idx : corpus_start_idx + corpus_chunk_size],
+            )
+            scores_top_k_values, scores_top_k_idx = torch.topk(
+                scores, min(top_k, len(scores[0])), dim=1, largest=True, sorted=False
+            )
+            scores_top_k_values = scores_top_k_values.cpu().tolist()
+            scores_top_k_idx = scores_top_k_idx.cpu().tolist()
+            for query_itr in range(len(scores)):
+                for top_k_idx, corpus_itr in enumerate(scores_top_k_idx[query_itr]):
+                    i = query_start_idx + query_itr
+                    j = corpus_start_idx + corpus_itr
+                    if i != j and scores_top_k_values[query_itr][top_k_idx] > min_score:
+                        pairs.put((scores_top_k_values[query_itr][top_k_idx], i, j))
+                        num_added += 1
+                        if num_added >= max_pairs:
+                            entry = pairs.get()
+                            min_score = entry[0]
+    # Get the pairs
+    added_pairs = set()  # Used for duplicate detection
+    pairs_list = []
+    while not pairs.empty():
+        score, i, j = pairs.get()
+        sorted_i, sorted_j = sorted([i, j])
+        if sorted_i != sorted_j and (sorted_i, sorted_j) not in added_pairs:
+            added_pairs.add((sorted_i, sorted_j))
+            pairs_list.append([score, sorted_i, sorted_j])
+    # Highest scores first
+    pairs_list = sorted(pairs_list, key=lambda x: x[0], reverse=True)
+    return pairs_list
+def information_retrieval(*args, **kwargs) -> List[List[Dict[str, Union[int, float]]]]:
+    """This function is deprecated. Use semantic_search instead"""
+    return semantic_search(*args, **kwargs)
+def semantic_search(
+    query_embeddings: Tensor,
+    corpus_embeddings: Tensor,
+    query_chunk_size: int = 100,
+    corpus_chunk_size: int = 500000,
+    top_k: int = 10,
+    score_function: Callable[[Tensor, Tensor], Tensor] = cos_sim,
+) -> List[List[Dict[str, Union[int, float]]]]:
+    """
+    This function performs a cosine similarity search between a list of query embeddings  and a list of corpus embeddings.
+    It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.
+    :param query_embeddings: A 2 dimensional tensor with the query embeddings.
+    :param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings.
+    :param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory.
+    :param corpus_chunk_size: Scans the corpus 100k entries at a time. Increasing that value increases the speed, but requires more memory.
+    :param top_k: Retrieve top k matching entries.
+    :param score_function: Function for computing scores. By default, cosine similarity.
+    :return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
+    """
+    if isinstance(query_embeddings, (np.ndarray, np.generic)):
+        query_embeddings = torch.from_numpy(query_embeddings)
+    elif isinstance(query_embeddings, list):
+        query_embeddings = torch.stack(query_embeddings)
+    if len(query_embeddings.shape) == 1:
+        query_embeddings = query_embeddings.unsqueeze(0)
+    if isinstance(corpus_embeddings, (np.ndarray, np.generic)):
+        corpus_embeddings = torch.from_numpy(corpus_embeddings)
+    elif isinstance(corpus_embeddings, list):
+        corpus_embeddings = torch.stack(corpus_embeddings)
+    # Check that corpus and queries are on the same device
+    if corpus_embeddings.device != query_embeddings.device:
+        query_embeddings = query_embeddings.to(corpus_embeddings.device)
+    queries_result_list = [[] for _ in range(len(query_embeddings))]
+    for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
+        # Iterate over chunks of the corpus
+        for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
+            # Compute cosine similarities
+            cos_scores = score_function(
+                query_embeddings[query_start_idx : query_start_idx + query_chunk_size],
+                corpus_embeddings[corpus_start_idx : corpus_start_idx + corpus_chunk_size],
+            )
+            # Get top-k scores
+            cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
+                cos_scores, min(top_k, len(cos_scores[0])), dim=1, largest=True, sorted=False
+            )
+            cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
+            cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
+            for query_itr in range(len(cos_scores)):
+                for sub_corpus_id, score in zip(cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]):
+                    corpus_id = corpus_start_idx + sub_corpus_id
+                    query_id = query_start_idx + query_itr
+                    if len(queries_result_list[query_id]) < top_k:
+                        heapq.heappush(
+                            queries_result_list[query_id], (score, corpus_id)
+                        )  # heaqp tracks the quantity of the first element in the tuple
+                    else:
+                        heapq.heappushpop(queries_result_list[query_id], (score, corpus_id))
+    # change the data format and sort
+    for query_id in range(len(queries_result_list)):
+        for doc_itr in range(len(queries_result_list[query_id])):
+            score, corpus_id = queries_result_list[query_id][doc_itr]
+            queries_result_list[query_id][doc_itr] = {"corpus_id": corpus_id, "score": score}
+        queries_result_list[query_id] = sorted(queries_result_list[query_id], key=lambda x: x["score"], reverse=True)
+    return queries_result_list
+def http_get(url, path) -> None:
+    """
+    Downloads a URL to a given path on disc
+    """
+    if os.path.dirname(path) != "":
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+    req = requests.get(url, stream=True)
+    if req.status_code != 200:
+        print("Exception when trying to download {}. Response {}".format(url, req.status_code), file=sys.stderr)
+        req.raise_for_status()
+        return
+    download_filepath = path + "_part"
+    with open(download_filepath, "wb") as file_binary:
+        content_length = req.headers.get("Content-Length")
+        total = int(content_length) if content_length is not None else None
+        progress = tqdm(unit="B", total=total, unit_scale=True)
+        for chunk in req.iter_content(chunk_size=1024):
+            if chunk:  # filter out keep-alive new chunks
+                progress.update(len(chunk))
+                file_binary.write(chunk)
+    os.rename(download_filepath, path)
+    progress.close()
+def batch_to_device(batch, target_device: device):
+    """
+    send a pytorch batch to a device (CPU/GPU)
+    """
+    for key in batch:
+        if isinstance(batch[key], Tensor):
+            batch[key] = batch[key].to(target_device)
+    return batch
+def fullname(o) -> str:
+    """
+    Gives a full name (package_name.class_name) for a class / object in Python. Will
+    be used to load the correct classes from JSON files
+    """
+    module = o.__class__.__module__
+    if module is None or module == str.__class__.__module__:
+        return o.__class__.__name__  # Avoid reporting __builtin__
+    else:
+        return module + "." + o.__class__.__name__
+def import_from_string(dotted_path):
+    """
+    Import a dotted module path and return the attribute/class designated by the
+    last name in the path. Raise ImportError if the import failed.
+    """
+    try:
+        module_path, class_name = dotted_path.rsplit(".", 1)
+    except ValueError:
+        msg = "%s doesn't look like a module path" % dotted_path
+        raise ImportError(msg)
+    try:
+        module = importlib.import_module(dotted_path)
+    except Exception:
+        module = importlib.import_module(module_path)
+    try:
+        return getattr(module, class_name)
+    except AttributeError:
+        msg = 'Module "%s" does not define a "%s" attribute/class' % (module_path, class_name)
+        raise ImportError(msg)
+def community_detection(
+    embeddings, threshold=0.75, min_community_size=10, batch_size=1024, show_progress_bar=False
+) -> List[List[int]]:
+    """
+    Function for Fast Community Detection
+    Finds in the embeddings all communities, i.e. embeddings that are close (closer than threshold).
+    Returns only communities that are larger than min_community_size. The communities are returned
+    in decreasing order. The first element in each list is the central point in the community.
+    """
+    if not isinstance(embeddings, torch.Tensor):
+        embeddings = torch.tensor(embeddings)
+    threshold = torch.tensor(threshold, device=embeddings.device)
+    embeddings = normalize_embeddings(embeddings)
+    extracted_communities = []
+    # Maximum size for community
+    min_community_size = min(min_community_size, len(embeddings))
+    sort_max_size = min(max(2 * min_community_size, 50), len(embeddings))
+    for start_idx in tqdm(
+        range(0, len(embeddings), batch_size), desc="Finding clusters", disable=not show_progress_bar
+    ):
+        # Compute cosine similarity scores
+        cos_scores = embeddings[start_idx : start_idx + batch_size] @ embeddings.T
+        # Use a torch-heavy approach if the embeddings are on CUDA, otherwise a loop-heavy one
+        if embeddings.device.type in ["cuda", "npu"]:
+            # Threshold the cos scores and determine how many close embeddings exist per embedding
+            threshold_mask = cos_scores >= threshold
+            row_wise_count = threshold_mask.sum(1)
+            # Only consider embeddings with enough close other embeddings
+            large_enough_mask = row_wise_count >= min_community_size
+            if not large_enough_mask.any():
+                continue
+            row_wise_count = row_wise_count[large_enough_mask]
+            cos_scores = cos_scores[large_enough_mask]
+            # The max is the largest potential community, so we use that in topk
+            k = row_wise_count.max()
+            _, top_k_indices = cos_scores.topk(k=k, largest=True)
+            # Use the row-wise count to slice the indices
+            for count, indices in zip(row_wise_count, top_k_indices):
+                extracted_communities.append(indices[:count].tolist())
+        else:
+            # Minimum size for a community
+            top_k_values, _ = cos_scores.topk(k=min_community_size, largest=True)
+            # Filter for rows >= min_threshold
+            for i in range(len(top_k_values)):
+                if top_k_values[i][-1] >= threshold:
+                    # Only check top k most similar entries
+                    top_val_large, top_idx_large = cos_scores[i].topk(k=sort_max_size, largest=True)
+                    # Check if we need to increase sort_max_size
+                    while top_val_large[-1] > threshold and sort_max_size < len(embeddings):
+                        sort_max_size = min(2 * sort_max_size, len(embeddings))
+                        top_val_large, top_idx_large = cos_scores[i].topk(k=sort_max_size, largest=True)
+                    extracted_communities.append(top_idx_large[top_val_large >= threshold].tolist())
+    # Largest cluster first
+    extracted_communities = sorted(extracted_communities, key=lambda x: len(x), reverse=True)
+    # Step 2) Remove overlapping communities
+    unique_communities = []
+    extracted_ids = set()
+    for cluster_id, community in enumerate(extracted_communities):
+        non_overlapped_community = []
+        for idx in community:
+            if idx not in extracted_ids:
+                non_overlapped_community.append(idx)
+        if len(non_overlapped_community) >= min_community_size:
+            unique_communities.append(non_overlapped_community)
+            extracted_ids.update(non_overlapped_community)
+    unique_communities = sorted(unique_communities, key=lambda x: len(x), reverse=True)
+    return unique_communities
+##################
+#
+######################
+class disabled_tqdm(tqdm):
+    """
+    Class to override `disable` argument in case progress bars are globally disabled.
+    Taken from https://github.com/tqdm/tqdm/issues/619#issuecomment-619639324.
+    """
+    def __init__(self, *args, **kwargs):
+        kwargs["disable"] = True
+        super().__init__(*args, **kwargs)
+    def __delattr__(self, attr: str) -> None:
+        """Fix for https://github.com/huggingface/huggingface_hub/issues/1603"""
+        try:
+            super().__delattr__(attr)
+        except AttributeError:
+            if attr != "_lock":
+                raise
+def is_sentence_transformer_model(
+    model_name_or_path: str,
+    token: Optional[Union[bool, str]] = None,
+    cache_folder: Optional[str] = None,
+    revision: Optional[str] = None,
+) -> bool:
+    return bool(load_file_path(model_name_or_path, "modules.json", token, cache_folder, revision=revision))
+def load_file_path(
+    model_name_or_path: str,
+    filename: str,
+    token: Optional[Union[bool, str]],
+    cache_folder: Optional[str],
+    revision: Optional[str] = None,
+) -> Optional[str]:
+    # If file is local
+    file_path = os.path.join(model_name_or_path, filename)
+    if os.path.exists(file_path):
+        return file_path
+    # If file is remote
+    try:
+        return hf_hub_download(
+            model_name_or_path,
+            filename=filename,
+            revision=revision,
+            library_name="sentence-transformers",
+            token=token,
+            cache_dir=cache_folder,
+        )
+    except Exception:
+        return
+def load_dir_path(
+    model_name_or_path: str,
+    directory: str,
+    token: Optional[Union[bool, str]],
+    cache_folder: Optional[str],
+    revision: Optional[str] = None,
+) -> Optional[str]:
+    # If file is local
+    dir_path = os.path.join(model_name_or_path, directory)
+    if os.path.exists(dir_path):
+        return dir_path
+    download_kwargs = {
+        "repo_id": model_name_or_path,
+        "revision": revision,
+        "allow_patterns": f"{directory}/**",
+        "library_name": "sentence-transformers",
+        "token": token,
+        "cache_dir": cache_folder,
+        "tqdm_class": disabled_tqdm,
+    }
+    # Try to download from the remote
+    try:
+        repo_path = snapshot_download(**download_kwargs)
+    except Exception:
+        # Otherwise, try local (i.e. cache) only
+        download_kwargs["local_files_only"] = True
+        repo_path = snapshot_download(**download_kwargs)
+    return os.path.join(repo_path, directory)
+def save_to_hub_args_decorator(func):
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        # If repo_id not already set, use repo_name
+        repo_name = kwargs.pop("repo_name", None)
+        if repo_name and "repo_id" not in kwargs:
+            logger.warning(
+                "Providing a `repo_name` keyword argument to `save_to_hub` is deprecated, please use `repo_id` instead."
+            )
+            kwargs["repo_id"] = repo_name
+        # If positional args are used, adjust for the new "token" keyword argument
+        if len(args) >= 2:
+            args = (*args[:2], None, *args[2:])
+        return func(self, *args, **kwargs)
+    return wrapper
+def get_device_name() -> Literal["mps", "cuda", "npu", "hpu", "cpu"]:
+    """
+    Returns the name of the device where this module is running on.
+    It's simple implementation that doesn't cover cases when more powerful GPUs are available and
+    not a primary device ('cuda:0') or MPS device is available, but not configured properly:
+    https://pytorch.org/docs/master/notes/mps.html
+    :return: Device name, like 'cuda' or 'cpu'
+    """
+    if torch.cuda.is_available():
+        return "cuda"
+    elif torch.backends.mps.is_available():
+        return "mps"
+    elif is_torch_npu_available():
+        return "npu"
+    elif importlib.util.find_spec("habana_frameworks") is not None:
+        import habana_frameworks.torch.hpu as hthpu
+        if hthpu.is_available():
+            return "hpu"
+    return "cpu"
--- a/setup.cfg
+++ b/setup.cfg
+[metadata]
+description_file = README.md
--- a/setup.py
+++ b/setup.py
+from setuptools import setup, find_packages
+with open("README.md", mode="r", encoding="utf-8") as readme_file:
+    readme = readme_file.read()
+setup(
+    name="sentence-transformers",
+    version="2.7.0.dev0",
+    author="Nils Reimers",
+    author_email="info@nils-reimers.de",
+    description="Multilingual text embeddings",
+    long_description=readme,
+    long_description_content_type="text/markdown",
+    license="Apache License 2.0",
+    url="https://www.SBERT.net",
+    download_url="https://github.com/UKPLab/sentence-transformers/",
+    packages=find_packages(),
+    python_requires=">=3.8.0",
+    install_requires=[
+        "transformers>=4.32.0,<5.0.0",
+        "tqdm",
+        "torch>=1.11.0",
+        "numpy",
+        "scikit-learn",
+        "scipy",
+        "huggingface-hub>=0.15.1",
+        "Pillow",
+    ],
+    extras_require={
+        "dev": [
+            "pre-commit",
+            "pytest",
+            "ruff>=0.3.0",
+        ],
+    },
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3.8",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    keywords="Transformer Networks BERT XLNet sentence embedding PyTorch NLP deep learning",
+)
--- a/tests/conftest.py
+++ b/tests/conftest.py
+import os
+import platform
+import tempfile
+import pytest
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from sentence_transformers.models import Transformer, Pooling
+@pytest.fixture()
+def stsb_bert_tiny_model() -> SentenceTransformer:
+    return SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors")
+@pytest.fixture(scope="session")
+def stsb_bert_tiny_model_reused() -> SentenceTransformer:
+    return SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors")
+@pytest.fixture()
+def paraphrase_distilroberta_base_v1_model() -> SentenceTransformer:
+    return SentenceTransformer("paraphrase-distilroberta-base-v1")
+@pytest.fixture()
+def distilroberta_base_ce_model() -> CrossEncoder:
+    return CrossEncoder("distilroberta-base", num_labels=1)
+@pytest.fixture()
+def clip_vit_b_32_model() -> SentenceTransformer:
+    return SentenceTransformer("clip-ViT-B-32")
+@pytest.fixture()
+def distilbert_base_uncased_model() -> SentenceTransformer:
+    word_embedding_model = Transformer("distilbert-base-uncased")
+    pooling_model = Pooling(word_embedding_model.get_word_embedding_dimension())
+    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
+    return model
+@pytest.fixture()
+def cache_dir():
+    """
+    In the CI environment, we use a temporary directory as `cache_dir`
+    to avoid keeping the downloaded models on disk after the test.
+    This is only required for Ubuntu, as we otherwise have disk space issues there.
+    """
+    if os.environ.get("CI", None) and platform.system() == "Linux":
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            yield tmp_dir
+    else:
+        yield None
--- a/tests/test_cmnrl.py
+++ b/tests/test_cmnrl.py
+from contextlib import nullcontext
+from typing import List
+import pytest
+from sentence_transformers import SentenceTransformer, InputExample, losses
+import tqdm
+from transformers import set_seed
+import torch
+from torch.optim import Adam
+@pytest.mark.parametrize(
+    ["train_samples_mnrl", "train_samples_cmnrl", "same_grad", "scaler", "precision"],
+    [
+        (
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["aaa", "bbb", "ccc", "ddd", "eee"],
+                    ["aas", "bbs", "ccs", "dds", "ees"],
+                    ["xxx", "yyy", "zzz", "kkk", "fff"],
+                )
+            ],
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["aaa", "bbb", "ccc", "ddd", "eee"],
+                    ["aas", "bbs", "ccs", "dds", "ees"],
+                    ["xxx", "yyy", "zzz", "kkk", "fff"],
+                )
+            ],
+            True,
+            1.0,
+            1e-6,
+        ),
+        (
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["adsa", "czx", "dsada"],
+                    ["b", "fas", "xcz"],
+                    ["c", "yyy", "asdas"],
+                )
+            ],
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["aaa", "bbb", "ccc", "ddd", "eee"],
+                    ["aas", "bbs", "ccs", "dds", "ees"],
+                    ["xxx", "yyy", "zzz", "kkk", "fff"],
+                )
+            ],
+            False,
+            1.0,
+            1e-6,
+        ),
+        (
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["aaa", "bbb", "ccc", "ddd", "eee"],
+                    ["aas", "bbs", "ccs", "dds", "ees"],
+                    ["xxx", "yyy", "zzz", "kkk", "fff"],
+                )
+            ],
+            [
+                InputExample(texts=[q, p, n])
+                for q, p, n in zip(
+                    ["aaa", "bbb", "ccc", "ddd", "eee"],
+                    ["aas", "bbs", "ccs", "dds", "ees"],
+                    ["xxx", "yyy", "zzz", "kkk", "fff"],
+                )
+            ],
+            True,
+            1000.0,
+            1e-3,
+        ),
+    ],
+)
+def test_cmnrl_same_grad(
+    train_samples_mnrl: List[InputExample],
+    train_samples_cmnrl: List[InputExample],
+    same_grad: bool,
+    scaler: float,
+    precision: float,
+):
+    # Given:
+    sbert = SentenceTransformer("distilbert-base-uncased")
+    sbert.to("cpu")
+    optimizer = Adam(sbert.parameters())
+    # train_samples_mnrl
+    # train_samples_cmnrl
+    # same_grad
+    # scaler  # This simulates AMP scenarios
+    # precision
+    # When:
+    # First run with MNRL
+    set_seed(42)
+    optimizer.zero_grad()
+    loss_mnrl = losses.MultipleNegativesRankingLoss(sbert)
+    loss_mnrl_value: torch.Tensor = loss_mnrl.forward(*sbert.smart_batching_collate(train_samples_mnrl)) * scaler
+    loss_mnrl_value.backward()
+    grad_expected = {name: p.grad.clone() for name, p in loss_mnrl.named_parameters() if p.grad is not None}
+    # Then run with this cached version:
+    set_seed(42)
+    optimizer.zero_grad()
+    loss_cmnrl = losses.CachedMultipleNegativesRankingLoss(sbert, mini_batch_size=2)
+    loss_cmnrl_value = loss_cmnrl.forward(*sbert.smart_batching_collate(train_samples_cmnrl)) * scaler
+    loss_cmnrl_value.backward()
+    grad = {name: p.grad.clone() for name, p in loss_cmnrl.named_parameters() if p.grad is not None}
+    # Then:
+    if same_grad:
+        assert pytest.approx(loss_mnrl_value.item()) == loss_cmnrl_value.item()
+    else:
+        assert pytest.approx(loss_mnrl_value.item()) != loss_cmnrl_value.item()
+    nclose = 0
+    for name in tqdm.tqdm(grad_expected):
+        nclose += torch.allclose(grad[name], grad_expected[name], precision, precision)
+    if same_grad:
+        assert nclose == len(grad_expected)
+    else:
+        assert nclose != len(grad_expected)
+@pytest.mark.parametrize("use_rand_context", [True, False])
+def test_rand_context_working(use_rand_context: bool):
+    # Given:
+    from sentence_transformers.losses.CachedMultipleNegativesRankingLoss import (
+        RandContext,
+    )
+    a = torch.Tensor(1)
+    b = torch.Tensor(1)
+    random_state = RandContext(a, b) if use_rand_context else nullcontext()
+    expected = torch.rand(1000)
+    precision = 1e-6
+    # When:
+    with random_state:
+        # Then:
+        if use_rand_context:
+            assert torch.allclose(torch.rand(1000), expected, precision, precision)
+        else:
+            assert not torch.allclose(torch.rand(1000), expected, precision, precision)
--- a/tests/test_compute_embeddings.py
+++ b/tests/test_compute_embeddings.py
+"""
+Computes embeddings
+"""
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from sentence_transformers.util import get_device_name
+def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    """
+    Test that encode(output_value='token_embeddings') works
+    :return:
+    """
+    model = paraphrase_distilroberta_base_v1_model
+    sent = [
+        "Hello Word, a test sentence",
+        "Here comes another sentence",
+        "My final sentence",
+        "Sentences",
+        "Sentence five five five five five five five",
+    ]
+    emb = model.encode(sent, output_value="token_embeddings", batch_size=2)
+    assert len(emb) == len(sent)
+    device = get_device_name()
+    if device == "hpu":
+        for s, e in zip(sent, emb):
+            assert len(model.tokenize([s])["input_ids"][0]) == model.get_max_seq_length()
+    else:
+        for s, e in zip(sent, emb):
+            assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]
+def test_encode_single_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    model = paraphrase_distilroberta_base_v1_model
+    # Single sentence
+    emb = model.encode("Hello Word, a test sentence")
+    assert emb.shape == (768,)
+    assert abs(np.sum(emb) - 7.9811716) < 0.002
+    # Single sentence as list
+    emb = model.encode(["Hello Word, a test sentence"])
+    assert emb.shape == (1, 768)
+    assert abs(np.sum(emb) - 7.9811716) < 0.002
+    # Sentence list
+    emb = model.encode(
+        [
+            "Hello Word, a test sentence",
+            "Here comes another sentence",
+            "My final sentence",
+        ]
+    )
+    assert emb.shape == (3, 768)
+    assert abs(np.sum(emb) - 22.968266) < 0.007
+def test_encode_normalize(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    model = paraphrase_distilroberta_base_v1_model
+    emb = model.encode(
+        [
+            "Hello Word, a test sentence",
+            "Here comes another sentence",
+            "My final sentence",
+        ],
+        normalize_embeddings=True,
+    )
+    assert emb.shape == (3, 768)
+    for norm in np.linalg.norm(emb, axis=1):
+        assert abs(norm - 1) < 0.001
+def test_encode_tuple_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    model = paraphrase_distilroberta_base_v1_model
+    # Input a sentence tuple
+    emb = model.encode([("Hello Word, a test sentence", "Second input for model")])
+    assert emb.shape == (1, 768)
+    assert abs(np.sum(emb) - 9.503508) < 0.002
+    # List of sentence tuples
+    emb = model.encode(
+        [
+            ("Hello Word, a test sentence", "Second input for model"),
+            ("My second tuple", "With two inputs"),
+            ("Final tuple", "final test"),
+        ]
+    )
+    assert emb.shape == (3, 768)
+    assert abs(np.sum(emb) - 32.14627) < 0.002
--- a/tests/test_cross_encoder.py
+++ b/tests/test_cross_encoder.py
+"""
+Tests that the pretrained models produce the correct scores on the STSbenchmark dataset
+"""
+import csv
+import gzip
+import os
+from pathlib import Path
+import tempfile
+import pytest
+import torch
+from torch.utils.data import DataLoader
+from sentence_transformers import CrossEncoder, util
+from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
+from sentence_transformers.readers import InputExample
+from typing import Generator, List, Tuple
+@pytest.fixture()
+def sts_resource() -> Generator[Tuple[List[InputExample], List[InputExample]], None, None]:
+    sts_dataset_path = "datasets/stsbenchmark.tsv.gz"
+    if not os.path.exists(sts_dataset_path):
+        util.http_get("https://sbert.net/datasets/stsbenchmark.tsv.gz", sts_dataset_path)
+    stsb_train_samples = []
+    stsb_test_samples = []
+    with gzip.open(sts_dataset_path, "rt", encoding="utf8") as fIn:
+        reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
+        for row in reader:
+            score = float(row["score"]) / 5.0  # Normalize score to range 0 ... 1
+            inp_example = InputExample(texts=[row["sentence1"], row["sentence2"]], label=score)
+            if row["split"] == "test":
+                stsb_test_samples.append(inp_example)
+            elif row["split"] == "train":
+                stsb_train_samples.append(inp_example)
+    yield stsb_train_samples, stsb_test_samples
+def evaluate_stsb_test(
+    distilroberta_base_ce_model: CrossEncoder,
+    expected_score: float,
+    test_samples: List[InputExample],
+    num_test_samples: int = -1,
+) -> None:
+    model = distilroberta_base_ce_model
+    evaluator = CECorrelationEvaluator.from_input_examples(test_samples[:num_test_samples], name="sts-test")
+    score = evaluator(model) * 100
+    print("STS-Test Performance: {:.2f} vs. exp: {:.2f}".format(score, expected_score))
+    assert score > expected_score or abs(score - expected_score) < 0.1
+def test_pretrained_stsb(sts_resource: Tuple[List[InputExample], List[InputExample]]):
+    _, sts_test_samples = sts_resource
+    model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+    evaluate_stsb_test(model, 87.92, sts_test_samples)
+@pytest.mark.slow
+def test_train_stsb_slow(
+    distilroberta_base_ce_model: CrossEncoder, sts_resource: Tuple[List[InputExample], List[InputExample]]
+) -> None:
+    model = distilroberta_base_ce_model
+    sts_train_samples, sts_test_samples = sts_resource
+    train_dataloader = DataLoader(sts_train_samples, shuffle=True, batch_size=16)
+    model.fit(
+        train_dataloader=train_dataloader,
+        epochs=1,
+        warmup_steps=int(len(train_dataloader) * 0.1),
+    )
+    evaluate_stsb_test(model, 75, sts_test_samples)
+def test_train_stsb(
+    distilroberta_base_ce_model: CrossEncoder, sts_resource: Tuple[List[InputExample], List[InputExample]]
+) -> None:
+    model = distilroberta_base_ce_model
+    sts_train_samples, sts_test_samples = sts_resource
+    train_dataloader = DataLoader(sts_train_samples[:500], shuffle=True, batch_size=16)
+    model.fit(
+        train_dataloader=train_dataloader,
+        epochs=1,
+        warmup_steps=int(len(train_dataloader) * 0.1),
+    )
+    evaluate_stsb_test(model, 50, sts_test_samples, num_test_samples=100)
+def test_classifier_dropout_is_set() -> None:
+    model = CrossEncoder("cross-encoder/stsb-distilroberta-base", classifier_dropout=0.1234)
+    assert model.config.classifier_dropout == 0.1234
+    assert model.model.config.classifier_dropout == 0.1234
+def test_classifier_dropout_default_value() -> None:
+    model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+    assert model.config.classifier_dropout is None
+    assert model.model.config.classifier_dropout is None
+def test_load_with_revision() -> None:
+    model_name = "sentence-transformers-testing/stsb-bert-tiny-safetensors"
+    main_model = CrossEncoder(model_name, num_labels=1, revision="main")
+    latest_model = CrossEncoder(
+        model_name,
+        num_labels=1,
+        revision="f3cb857cba53019a20df283396bcca179cf051a4",
+    )
+    older_model = CrossEncoder(
+        model_name,
+        num_labels=1,
+        revision="ba33022fdf0b0fc2643263f0726f44d0a07d0e24",
+    )
+    # Set the classifier.bias and classifier.weight equal among models. This
+    # is needed because the AutoModelForSequenceClassification randomly initializes
+    # the classifier.bias and classifier.weight for each (model) initialization.
+    # The test is only possible if all models have the same classifier.bias
+    # and classifier.weight parameters.
+    latest_model.model.classifier.bias = main_model.model.classifier.bias
+    latest_model.model.classifier.weight = main_model.model.classifier.weight
+    older_model.model.classifier.bias = main_model.model.classifier.bias
+    older_model.model.classifier.weight = main_model.model.classifier.weight
+    test_sentences = [["Hello there!", "Hello, World!"]]
+    main_prob = main_model.predict(test_sentences, convert_to_tensor=True)
+    assert torch.equal(main_prob, latest_model.predict(test_sentences, convert_to_tensor=True))
+    assert not torch.equal(main_prob, older_model.predict(test_sentences, convert_to_tensor=True))
+def test_rank() -> None:
+    model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+    # We want to compute the similarity between the query sentence
+    query = "A man is eating pasta."
+    # With all sentences in the corpus
+    corpus = [
+        "A man is eating food.",
+        "A man is eating a piece of bread.",
+        "The girl is carrying a baby.",
+        "A man is riding a horse.",
+        "A woman is playing violin.",
+        "Two men pushed carts through the woods.",
+        "A man is riding a white horse on an enclosed ground.",
+        "A monkey is playing drums.",
+        "A cheetah is running behind its prey.",
+    ]
+    expected_ranking = [0, 1, 3, 6, 2, 5, 7, 4, 8]
+    # 1. We rank all sentences in the corpus for the query
+    ranks = model.rank(query, corpus)
+    pred_ranking = [rank["corpus_id"] for rank in ranks]
+    assert pred_ranking == expected_ranking
+@pytest.mark.parametrize("safe_serialization", [True, False, None])
+def test_safe_serialization(safe_serialization: bool) -> None:
+    with tempfile.TemporaryDirectory() as cache_folder:
+        model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+        if safe_serialization:
+            model.save(cache_folder, safe_serialization=safe_serialization)
+            model_files = list(Path(cache_folder).glob("**/model.safetensors"))
+            assert 1 == len(model_files)
+        elif safe_serialization is None:
+            model.save(cache_folder)
+            model_files = list(Path(cache_folder).glob("**/model.safetensors"))
+            assert 1 == len(model_files)
+        else:
+            model.save(cache_folder, safe_serialization=safe_serialization)
+            model_files = list(Path(cache_folder).glob("**/pytorch_model.bin"))
+            assert 1 == len(model_files)
--- a/tests/test_evaluator.py
+++ b/tests/test_evaluator.py
+"""
+Tests the correct computation of evaluation scores from BinaryClassificationEvaluator
+"""
+import csv
+import gzip
+import os
+import numpy as np
+from sklearn.metrics import accuracy_score, f1_score
+from torch.utils.data import DataLoader
+from sentence_transformers import (
+    InputExample,
+    SentenceTransformer,
+    evaluation,
+    losses,
+    util,
+)
+def test_BinaryClassificationEvaluator_find_best_f1_and_threshold() -> None:
+    """Tests that the F1 score for the computed threshold is correct"""
+    y_true = np.random.randint(0, 2, 1000)
+    y_pred_cosine = np.random.randn(1000)
+    (
+        best_f1,
+        best_precision,
+        best_recall,
+        threshold,
+    ) = evaluation.BinaryClassificationEvaluator.find_best_f1_and_threshold(
+        y_pred_cosine, y_true, high_score_more_similar=True
+    )
+    y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine]
+    sklearn_f1score = f1_score(y_true, y_pred_labels)
+    assert np.abs(best_f1 - sklearn_f1score) < 1e-6
+def test_BinaryClassificationEvaluator_find_best_accuracy_and_threshold() -> None:
+    """Tests that the Acc score for the computed threshold is correct"""
+    y_true = np.random.randint(0, 2, 1000)
+    y_pred_cosine = np.random.randn(1000)
+    (
+        max_acc,
+        threshold,
+    ) = evaluation.BinaryClassificationEvaluator.find_best_acc_and_threshold(
+        y_pred_cosine, y_true, high_score_more_similar=True
+    )
+    y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine]
+    sklearn_acc = accuracy_score(y_true, y_pred_labels)
+    assert np.abs(max_acc - sklearn_acc) < 1e-6
+def test_LabelAccuracyEvaluator(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    """Tests that the LabelAccuracyEvaluator can be loaded correctly"""
+    model = paraphrase_distilroberta_base_v1_model
+    nli_dataset_path = "datasets/AllNLI.tsv.gz"
+    if not os.path.exists(nli_dataset_path):
+        util.http_get("https://sbert.net/datasets/AllNLI.tsv.gz", nli_dataset_path)
+    label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}
+    dev_samples = []
+    with gzip.open(nli_dataset_path, "rt", encoding="utf8") as fIn:
+        reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
+        for row in reader:
+            if row["split"] == "train":
+                label_id = label2int[row["label"]]
+                dev_samples.append(InputExample(texts=[row["sentence1"], row["sentence2"]], label=label_id))
+                if len(dev_samples) >= 100:
+                    break
+    train_loss = losses.SoftmaxLoss(
+        model=model,
+        sentence_embedding_dimension=model.get_sentence_embedding_dimension(),
+        num_labels=len(label2int),
+    )
+    dev_dataloader = DataLoader(dev_samples, shuffle=False, batch_size=16)
+    evaluator = evaluation.LabelAccuracyEvaluator(dev_dataloader, softmax_model=train_loss)
+    acc = evaluator(model)
+    assert acc > 0.2
+def test_ParaphraseMiningEvaluator(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
+    """Tests that the ParaphraseMiningEvaluator can be loaded"""
+    model = paraphrase_distilroberta_base_v1_model
+    sentences = {
+        0: "Hello World",
+        1: "Hello World!",
+        2: "The cat is on the table",
+        3: "On the table the cat is",
+    }
+    data_eval = evaluation.ParaphraseMiningEvaluator(sentences, [(0, 1), (2, 3)])
+    score = data_eval(model)
+    assert score > 0.99
--- a/tests/test_image_embeddings.py
+++ b/tests/test_image_embeddings.py
+"""
+Compute image embeddings
+"""
+import os
+from PIL import Image
+from sentence_transformers import util, SentenceTransformer
+def test_simple_encode(clip_vit_b_32_model: SentenceTransformer) -> None:
+    model = clip_vit_b_32_model
+    # Encode an image:
+    image_filepath = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "../examples/applications/image-search/two_dogs_in_snow.jpg",
+    )
+    img_emb = model.encode(Image.open(image_filepath))
+    # Encode text descriptions
+    text_emb = model.encode(["Two dogs in the snow", "A cat on a table", "A picture of London at night"])
+    # Compute cosine similarities
+    cos_scores = util.cos_sim(img_emb, text_emb)[0]
+    assert abs(cos_scores[0] - 0.3069) < 0.01
+    assert abs(cos_scores[1] - 0.1010) < 0.01
+    assert abs(cos_scores[2] - 0.1086) < 0.01