First add

0fccd232 · Rayyyyy · 0fccd232 · 0fccd232 · 0fccd232 · 0fccd232
Commit 0fccd232 authored May 27, 2024 by Rayyyyy
20 changed files
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/fontawesome-webfont.woff
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/fontawesome-webfont.woff
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/fontawesome-webfont.woff2
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/fontawesome-webfont.woff2
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold-italic.woff
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold-italic.woff
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold-italic.woff2
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold-italic.woff2
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold.woff
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold.woff
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold.woff2
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-bold.woff2
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal-italic.woff
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal-italic.woff
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal-italic.woff2
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal-italic.woff2
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal.woff
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal.woff
--- a/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal.woff2
+++ b/docs/_themes/sphinx_rtd_theme/static/css/fonts/lato-normal.woff2
--- a/docs/_themes/sphinx_rtd_theme/static/css/theme.css
+++ b/docs/_themes/sphinx_rtd_theme/static/css/theme.css
--- a/docs/_themes/sphinx_rtd_theme/static/js/badge_only.js
+++ b/docs/_themes/sphinx_rtd_theme/static/js/badge_only.js
+!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});
\ No newline at end of file
--- a/docs/_themes/sphinx_rtd_theme/static/js/theme.js
+++ b/docs/_themes/sphinx_rtd_theme/static/js/theme.js
+!function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n('<span class="toctree-expand"></span>'),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t<e.length&&!window.requestAnimationFrame;++t)window.requestAnimationFrame=window[e[t]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[t]+"CancelAnimationFrame"]||window[e[t]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,t){var i=(new Date).getTime(),o=Math.max(0,16-(i-n)),r=window.setTimeout((function(){e(i+o)}),o);return n=i+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()}).call(window)},function(n,e){n.exports=jQuery},function(n,e,t){}]);
\ No newline at end of file
--- a/docs/_themes/sphinx_rtd_theme/theme.conf
+++ b/docs/_themes/sphinx_rtd_theme/theme.conf
+[theme]
+inherit = basic
+stylesheet = css/theme.css
+pygments_style = default
+[options]
+canonical_url =
+analytics_id =
+collapse_navigation = True
+sticky_navigation = True
+navigation_depth = 4
+includehidden = True
+titles_only =
+logo_only =
+display_version = True
+prev_next_buttons_location = bottom
+style_external_links = False
+style_nav_header_background =
\ No newline at end of file
--- a/docs/_themes/sphinx_rtd_theme/versions.html
+++ b/docs/_themes/sphinx_rtd_theme/versions.html
+{% if READTHEDOCS %}
+{# Add rst-badge after rst-versions for small badge style. #}
+  <div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      <span class="fa fa-book"> Read the Docs</span>
+      v: {{ current_version }}
+      <span class="fa fa-caret-down"></span>
+    </span>
+    <div class="rst-other-versions">
+      <dl>
+        <dt>{{ _('Versions') }}</dt>
+        {% for slug, url in versions %}
+          <dd><a href="{{ url }}">{{ slug }}</a></dd>
+        {% endfor %}
+      </dl>
+      <dl>
+        <dt>{{ _('Downloads') }}</dt>
+        {% for type, url in downloads %}
+          <dd><a href="{{ url }}">{{ type }}</a></dd>
+        {% endfor %}
+      </dl>
+      <dl>
+        {# Translators: The phrase "Read the Docs" is not translated #}
+        <dt>{{ _('On Read the Docs') }}</dt>
+          <dd>
+            <a href="//{{ PRODUCTION_DOMAIN }}/projects/{{ slug }}/?fromdocs={{ slug }}">{{ _('Project Home') }}</a>
+          </dd>
+          <dd>
+            <a href="//{{ PRODUCTION_DOMAIN }}/builds/{{ slug }}/?fromdocs={{ slug }}">{{ _('Builds') }}</a>
+          </dd>
+      </dl>
+    </div>
+  </div>
+{% endif %}
--- a/docs/conf.py
+++ b/docs/conf.py
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+from recommonmark.transform import AutoStructify
+import os
+from sphinx.domains import Domain
+import datetime
+# -- Project information -----------------------------------------------------
+project = "Sentence-Transformers"
+copyright = str(datetime.datetime.now().year) + ", Nils Reimers"
+author = "Nils Reimers"
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ["sphinx.ext.autodoc", "recommonmark", "sphinx_markdown_tables"]
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "nr_examples"]
+# -- Options for HTML output -------------------------------------------------
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+html_theme_path = ["_themes"]
+html_theme_options = {"logo_only": True, "canonical_url": "https://www.sbert.net"}
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+html_css_files = [
+    "css/custom.css",
+]
+html_js_files = [
+    "js/custom.js",
+]
+html_show_sourcelink = False
+html_context = {
+    "display_github": True,
+    "github_user": "UKPLab",
+    "github_repo": "sentence-transformers",
+    "github_version": "master/",
+}
+html_logo = "img/logo.png"
+html_favicon = "img/favicon.ico"
+autoclass_content = "both"
+class GithubURLDomain(Domain):
+    """
+    Resolve .py links to their respective Github URL
+    """
+    name = "githuburl"
+    ROOT = "https://github.com/UKPLab/sentence-transformers/tree/master"
+    def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):
+        if (target.endswith(".py") or target.endswith(".ipynb")) and not target.startswith("http"):
+            from_folder = os.path.dirname(fromdocname)
+            contnode["refuri"] = "/".join([self.ROOT, from_folder, target])
+            return [("githuburl:any", contnode)]
+        return []
+def setup(app):
+    app.add_domain(GithubURLDomain)
+    app.add_config_value(
+        "recommonmark_config",
+        {
+            #'url_resolver': lambda url: github_doc_root + url,
+            "auto_toc_tree_section": "Contents",
+        },
+        True,
+    )
+    app.add_transform(AutoStructify)
--- a/docs/contact.md
+++ b/docs/contact.md
+# Contact
+In case of questions, feel free to open a [Github Issue](https://github.com/UKPLab/sentence-transformers/issues) or write me an email: [info@nils-reimers.de](mailto:info@nils-reimers.de).
+**SentenceTransformers is maintained by:**  
+Nils Reimers  
+Ubiquitous Knowledge Processing (UKP) Lab  
+FB 20 / Department of Computer Science  
+Technische Universität Darmstadt  
+Hochschulstr. 10  
+64289 Darmstadt  
+Germany  
+[Website](https://www.informatik.tu-darmstadt.de/ukp/ukp_home/index.en.jsp)
+**Privacy Policy**  
+The webserver / web hosting company might collect certain log files to prevent abuse of services. These log files can include: IP address, URL, date and time.
+We do not use any tracking services or cookies to track or re-identify visitors. 
\ No newline at end of file
--- a/docs/hugging_face.md
+++ b/docs/hugging_face.md
+# Hugging Face 🤗
+## The Hugging Face Hub
+In addition to the official [pre-trained models](https://www.sbert.net/docs/pretrained_models.html), you can find over 500 `sentence-transformer` models on the [Hugging Face Hub](http://hf.co/models?library=sentence-transformers&sort=downloads).
+All models on the Hugging Face Hub come with the following:
+1. An [automatically generated model card](https://huggingface.co/docs/hub/models-cards#what-are-model-cards) with a description, example code snippets, architecture overview, and more. 
+2. [Metadata tags](https://huggingface.co/docs/hub/models-cards#model-card-metadata) that help for discoverability and contain additional information such as a usage license.
+3. An [interactive widget](https://huggingface.co/docs/hub/models-widgets) you can use to play with the model directly in the browser.
+4. An [Inference API](https://huggingface.co/docs/hub/models-inference) that allows you to make inference requests.
+<img style="height:400px;display:block;margin-left:auto;margin-right:auto;" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/libraries-sentence_transformers_widget.png"/>
+## Using Hugging Face models
+Any pre-trained models from the Hub can be loaded with a single line of code:
+```py
+from sentence_transformers import SentenceTransformer
+model = SentenceTransformer("model_name")
+```
+You can even click `Use in sentence-transformers` to get a code snippet that you can copy and paste! 
+<div style="display:flex; flex-direction:column; gap: 15px; margin-bottom: 15px;">
+<img style=max-height:150px;object-fit:contain;" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/libraries-sentence_transformers_snippet1.png"/>
+<img style="max-height:130px;object-fit:contain" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/libraries-sentence_transformers_snippet2.png"/>
+</div>
+Here is an example that loads the [multi-qa-MiniLM-L6-cos-v1 model](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1) and uses it to encode sentences and then compute the distance between them for doing semantic search.
+```py
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode([
+    "London has 9,787,426 inhabitants at the 2011 census",
+    "London is known for its finacial district",
+])
+print("Similarity:", util.dot_score(query_embedding, passage_embedding))
+```
+Here is another example, this time using the [clips/mfaq model](https://huggingface.co/clips/mfaq) for multilingual FAQ retrieval. After embedding the query and the answers, we perform a semantic search to find the most relevant answer. 
+```py
+from sentence_transformers import SentenceTransformer, util
+question = "<Q>How many models can I host on HuggingFace?"
+answer_1 = "<A>All plans come with unlimited private models and datasets."
+answer_2 = "<A>AutoNLP is an automatic way to train and deploy state-of-the-art NLP models, seamlessly integrated with the Hugging Face ecosystem."
+answer_3 = "<A>Based on how much training data and model variants are created, we send you a compute cost and payment link - as low as $10 per job."
+model = SentenceTransformer("clips/mfaq")
+query_embedding = model.encode(question)
+corpus_embeddings = model.encode([answer_1, answer_2, answer_3])
+print(util.semantic_search(query_embedding, corpus_embeddings))
+```
+## Sharing your models
+Once you've installed the [Hub Client Library](https://huggingface.co/docs/huggingface_hub/quick-start), you can login through your terminal with your Hugging Face account.
+```bash
+pip install huggingface_hub
+huggingface-cli login
+```
+Then, you can share your SentenceTransformers models by calling the [`push_to_hub` method](https://www.sbert.net/docs/package_reference/SentenceTransformer.html#sentence_transformers.SentenceTransformer.push_to_hub) from a trained model. By default, the model will be uploaded to your account, but you can upload to an [organization](https://huggingface.co/docs/hub/organizations) by providing the organization as a part of the `repo_id`, e.g. `model.push_to_hub("my_organization/my_model_name")`. `push_to_hub` automatically generates a model card, an inference widget, example code snippets, and more.
+```py
+from sentence_transformers import SentenceTransformer
+# Load or train a model
+model.push_to_hub("my_new_model")
+```
+You can automatically add to the Hub's model card a list of datasets you used to train the model with the argument `train_datasets: Optional[List[str]] = None)`. See the "Datasets used to train" section in the [ITESM/sentece-embeddings-BETO](https://huggingface.co/ITESM/sentece-embeddings-BETO) model for an example of the final result.
+```py
+model.push_to_hub("my_new_model", train_datasets=["GEM/wiki_lingua", "code_search_net"])
+```
+## Sharing your embeddings
+The Hugging Face Hub can also be used to store and share any embeddings you generate. You can export your embeddings to CSV, ZIP, Pickle, or any other format, and then upload them to the Hub as a [Dataset](https://huggingface.co/docs/hub/datasets-adding). Read the ["Getting Started With Embeddings" blog post](https://huggingface.co/blog/getting-started-with-embeddings#2-host-embeddings-for-free-on-the-hugging-face-hub) for more information.
+## Additional resources
+* [Hugging Face Hub docs](https://huggingface.co/docs/hub/index)
+* Integration with Hub [announcement](https://huggingface.co/blog/sentence-transformers-in-the-hub).
--- a/docs/img/20news_semantic.png
+++ b/docs/img/20news_semantic.png
--- a/docs/img/20news_top2vec.png
+++ b/docs/img/20news_top2vec.png