bert-large infer

fd5309bc · yangzhong · fd5309bc · fd5309bc · fd5309bc · fd5309bc
Commit fd5309bc authored Oct 23, 2025 by yangzhong
20 changed files
--- a/AGENTS.md
+++ b/AGENTS.md
+# AGENTS.md Guide for Hugging Face Transformers
+This AGENTS.md file provides guidance for code agents working with this codebase.
+## Core Project Structure
+- `/src/transformers`: This contains the core source code for the library
+  - `/models`: Code for individual models. Models inherit from base classes in the root `/src/transformers` directory.
+- `/tests`: This contains the core test classes for the library. These are usually inherited rather than directly run.
+  - `/models`: Tests for individual models. Model tests inherit from common tests in the root `/tests` directory.
+- `/docs`: This contains the documentation for the library, including guides, tutorials, and API references.
+## Coding Conventions for Hugging Face Transformers
+- PRs should be as brief as possible. Bugfix PRs in particular can often be only one or two lines long, and do not need large comments, docstrings or new functions in this case. Aim to minimize the size of the diff.
+- When writing tests, they should be added to an existing file. The only exception is for PRs to add a new model, when a new test directory should be created for that model.
+- Code style is enforced in the CI. You can install the style tools with `pip install -e .[quality]`. You can then run `make fixup` to apply style and consistency fixes to your code.
+## Copying and inheritance
+Many models in the codebase have similar code, but it is not shared by inheritance because we want each model file to be self-contained.
+We use two mechanisms to keep this code in sync:
+- "Copied from" syntax. Functions or entire classes can have a comment at the top like this: `# Copied from transformers.models.llama.modeling_llama.rotate_half` or `# Copied from transformers.models.t5.modeling_t5.T5LayerNorm with T5->MT5`
+  These comments are actively checked by the style tools, and copies will automatically be updated when the base code is updated. If you need to update a copied function, you should
+  either update the base function and use `make fixup` to propagate the change to all copies, or simply remove the `# Copied from` comment if that is inappropriate.
+- "Modular" files. These files briefly define models by composing them using inheritance from other models. They are not meant to be used directly. Instead, the style tools
+  automatically generate a complete modeling file, like `modeling_bert.py`, from the modular file like `modular_bert.py`. If a model has a modular file, the modeling file
+  should never be edited directly! Instead, changes should be made in the modular file, and then you should run `make fixup` to update the modeling file automatically.
+When adding new models, you should prefer `modular` style.
+## Testing
+After making changes, you should usually run `make fixup` to ensure any copies and modular files are updated, and then test all affected models. This includes both
+the model you made the changes in and any other models that were updated by `make fixup`. Tests can be run with `pytest tests/models/[name]/test_modeling_[name].py`
+If your changes affect code in other classes like tokenizers or processors, you should run those tests instead, like `test_processing_[name].py` or `test_tokenization_[name].py`.
+In order to run tests, you may need to install dependencies. You can do this with `pip install -e .[testing]`. You will probably also need to `pip install torch accelerate` if your environment does not already have them.
\ No newline at end of file
--- a/CITATION.cff
+++ b/CITATION.cff
+cff-version: "1.2.0"
+date-released: 2020-10
+message: "If you use this software, please cite it using these metadata."
+title: "Transformers: State-of-the-Art Natural Language Processing"
+url: "https://github.com/huggingface/transformers"
+authors: 
+  - family-names: Wolf
+    given-names: Thomas
+  - family-names: Debut
+    given-names: Lysandre
+  - family-names: Sanh
+    given-names: Victor
+  - family-names: Chaumond
+    given-names: Julien
+  - family-names: Delangue
+    given-names: Clement
+  - family-names: Moi
+    given-names: Anthony
+  - family-names: Cistac
+    given-names: Perric
+  - family-names: Ma
+    given-names: Clara
+  - family-names: Jernite
+    given-names: Yacine
+  - family-names: Plu
+    given-names: Julien
+  - family-names: Xu
+    given-names: Canwen
+  - family-names: "Le Scao"
+    given-names: Teven
+  - family-names: Gugger
+    given-names: Sylvain
+  - family-names: Drame
+    given-names: Mariama
+  - family-names: Lhoest
+    given-names: Quentin
+  - family-names: Rush
+    given-names: "Alexander M."
+preferred-citation:
+  type: conference-paper
+  authors:
+  - family-names: Wolf
+    given-names: Thomas
+  - family-names: Debut
+    given-names: Lysandre
+  - family-names: Sanh
+    given-names: Victor
+  - family-names: Chaumond
+    given-names: Julien
+  - family-names: Delangue
+    given-names: Clement
+  - family-names: Moi
+    given-names: Anthony
+  - family-names: Cistac
+    given-names: Perric
+  - family-names: Ma
+    given-names: Clara
+  - family-names: Jernite
+    given-names: Yacine
+  - family-names: Plu
+    given-names: Julien
+  - family-names: Xu
+    given-names: Canwen
+  - family-names: "Le Scao"
+    given-names: Teven
+  - family-names: Gugger
+    given-names: Sylvain
+  - family-names: Drame
+    given-names: Mariama
+  - family-names: Lhoest
+    given-names: Quentin
+  - family-names: Rush
+    given-names: "Alexander M."
+  booktitle: "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations"
+  month: 10
+  start: 38
+  end: 45
+  title: "Transformers: State-of-the-Art Natural Language Processing"
+  year: 2020
+  publisher: "Association for Computational Linguistics"
+  url: "https://www.aclweb.org/anthology/2020.emnlp-demos.6"
+  address: "Online"
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
+# Contributor Covenant Code of Conduct
+## Our Pledge
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+## Our Standards
+Examples of behavior that contributes to a positive environment for our
+community include:
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+Examples of unacceptable behavior include:
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+## Enforcement Responsibilities
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+## Scope
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+## Enforcement
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+feedback@huggingface.co.
+All complaints will be reviewed and investigated promptly and fairly.
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+## Enforcement Guidelines
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+### 1. Correction
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+### 2. Warning
+**Community Impact**: A violation through a single incident or series of
+actions.
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+### 3. Temporary Ban
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+### 4. Permanent Ban
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+## Attribution
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
+<!---
+Copyright 2020 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+[code of conduct](https://github.com/huggingface/transformers/blob/main/CODE_OF_CONDUCT.md).
+**This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).**
+## Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+* Fix outstanding issues with the existing code.
+* Submit issues related to bugs or desired new features.
+* Implement new models.
+* Contribute to the examples or to the documentation.
+If you don't know where to start, there is a special [Good First
+Issue](https://github.com/huggingface/transformers/contribute) listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the [Good Second Issue](https://github.com/huggingface/transformers/labels/Good%20Second%20Issue) list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+> All contributions are equally valuable to the community. 🥰
+## Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](#create-a-pull-request) and open a Pull Request!
+## Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+### Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could **make sure the bug was not
+already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions.
+> [!TIP]
+> We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+* Your **OS type and version** and **Python**, and **PyTorch** versions when applicable.
+* A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+* The *full* traceback if an exception is raised.
+* Attach any other additional information, like screenshots, you think may help.
+To get the OS and software versions automatically, run the following command:
+```bash
+transformers env
+```
+You can also run the same command from the root of the repository:
+```bash
+python src/transformers/commands/transformers_cli.py env
+```
+### Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+   Whatever it is, we'd love to hear about it!
+2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+3. Provide a *code snippet* that demonstrates the features usage.
+4. If the feature is related to a paper, please include a link.
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added [templates](https://github.com/huggingface/transformers/tree/main/templates) to help you get started with your issue.
+## Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+* A short description of the model and a link to the paper.
+* Link to the implementation if it is open-sourced.
+* Link to the model weights if they are available.
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for [how to add a model to 🤗 Transformers](https://huggingface.co/docs/transformers/modular_transformers).
+### Vision-Language Model Contribution Checklist
+If you're contributing a **vision-language model** (or any multimodal model that processes images/videos), please follow this checklist. Maintainers will use this to review your PR, and completing these steps will significantly increase the likelihood of your PR being merged quickly.
+**Required checklist for all vision-language model contributions:**
+☐ **1. Implement a modular file**
+All new models should use the modular architecture pattern. Create a `modular_<model_name>.py` file using the modular model converter:
+- Use the CLI, [`transformers add-new-model-like`](https://github.com/huggingface/transformers/blob/main/src/transformers/cli/add_new_model_like.py) to generate a modular skeleton and get started
+- All code should be in the modular file if possible. Modeling must be in it, it's better if configuration is in it as well. 
+- Reuse existing patterns from similar models as much as possible
+To verify your modular file is correct, run:
+```bash
+python utils/modular_model_converter.py <model_name>
+```
+This will generate the separate files (`modeling_*.py`, `configuration_*.py`, etc.) from your modular file. The CI will enforce that these generated files match your modular file.
+☐ **2. Add a fast image processor (for image models)**
+If your model processes images, implement a fast image processor that uses `torch` and `torchvision` instead of PIL/numpy for better inference performance:
+- See the detailed guide in [#36978](https://github.com/huggingface/transformers/issues/36978)
+- Fast processors inherit from `BaseImageProcessorFast`
+- Examples: `LlavaOnevisionImageProcessorFast`, `Idefics2ImageProcessorFast`
+☐ **3. Create a weight conversion script**
+Add a `convert_<model_name>_to_hf.py` script that converts the original model weights to the HuggingFace format:
+- Script should handle checkpoint loading, key mapping, and saving in HF format
+- Include usage examples and documentation in the script
+- Examples: [`convert_llava_onevision_weights_to_hf.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llava_onevision/convert_llava_onevision_weights_to_hf.py), [`convert_idefics2_weights_to_hf.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/idefics2/convert_idefics2_weights_to_hf.py)
+☐ **4. Add integration tests with exact output matching**
+At minimum, add an `IntegrationTest` class that tests end-to-end generation (processing and modelling) with **exact** output matching:
+- For generative models: test that generated text matches expected output exactly
+- For non-generative models: test that output logits match expected values
+- Tests should use real checkpoints (load in 4-bit or half precision if the checkpoint is too big to fit in our CI runners) and real inputs
+- Example pattern:
+```python
+class MyModelIntegrationTest(unittest.TestCase):
+    @slow
+    def test_model_integration(self):
+        model = MyModelForConditionalGeneration.from_pretrained("org/model-name")
+        processor = AutoProcessor.from_pretrained("org/model-name")
+        inputs = processor(images=image, text=prompt, return_tensors="pt")
+        output = model.generate(**inputs, max_new_tokens=20)
+        EXPECTED_TEXT = "exact expected output"
+        self.assertEqual(processor.decode(output[0]), EXPECTED_TEXT)
+```
+See `tests/models/llava_onevision/test_modeling_llava_onevision.py` for complete examples.
+☐ **5. Update documentation**
+Add or update model documentation:
+- Create if the cli hasn't `docs/source/en/model_doc/<model_name>.md` with usage examples
+- Include model description, paper link, and basic usage with `Pipeline` and `AutoModel`
+- Add the model to the appropriate TOC files
+☐ **6. Look for reusable patterns**
+The library has 400+ models with many established patterns:
+- Search for similar models (e.g., other vision-language models)
+- Reuse attention mechanisms, layer implementations, and processing patterns
+- Check models like LLaVA, Idefics2, Fuyu for vision-language patterns
+- Use provided decorators like (`auto_docstring`, `can_return_tuple`, `check_model_inputs` and `_can_record_outputs`) where relevant. 
+- Don't reinvent the wheel
+☐ **7. Run quality checks and read the output**
+Before submitting your PR, install quality dependencies and run the full check suite:
+```bash
+pip install -e ".[quality]"
+make fixup
+```
+**Important**: Take time to read the output of `make fixup`. It will:
+- Lint and format your code automatically
+- Run consistency checks (imports, docstrings, etc.)
+- Show any remaining issues that need manual fixes
+All checks must pass before your PR can be merged.
+**If this checklist is complete, your PR has a very high likelihood of being merged!** Following these steps makes the maintainers' work much easier and will reduce the number of review iterations, getting your important work out there faster.
+#### Copy-pastable checklist for maintainers
+Here's a condensed version maintainers can copy into PRs:
+```markdown
+## Multimodal Model Addition Checklist
+Please ensure your PR completes all following items. See the [full checklist](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#vision-language-model-contribution-checklist) for details.
+- [ ] **Modular file**: `modular_<model_name>.py` implemented and verified with `python utils/modular_model_converter.py <model_name>`
+- [ ] **Fast image processor**: Implemented using `BaseImageProcessorFast` (see [#36978](https://github.com/huggingface/transformers/issues/36978))
+- [ ] **Conversion script**: `convert_<model_name>_to_hf.py` added with usage examples
+- [ ] **Integration tests**: End-to-end tests with exact output matching (text or logits)
+- [ ] **Documentation**: Model docs added/updated in `docs/source/en/model_doc/`
+- [ ] **Pattern reuse**: Verified against similar models (LLaVA, Idefics2, etc.)
+- [ ] **Quality checks**: `make fixup` passes with no errors
+```
+## Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation [README](https://github.com/huggingface/transformers/tree/main/docs).
+## Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic `git` proficiency to contribute to
+🤗 Transformers. While `git` is not the easiest tool to use, it has the greatest
+manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
+Git](https://git-scm.com/book/en/v2) is a very good reference.
+You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+1. Fork the [repository](https://github.com/huggingface/transformers) by
+   clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+2. Clone your fork to your local disk, and add the base repository as a remote:
+   ```bash
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+   ```
+3. Create a new branch to hold your development changes:
+   ```bash
+   git checkout -b a-descriptive-name-for-my-changes
+   ```
+   🚨 **Do not** work on the `main` branch!
+4. Set up a development environment by running the following command in a virtual environment:
+   ```bash
+   pip install -e ".[dev]"
+   ```
+   If 🤗 Transformers was already installed in the virtual environment, remove
+   it with `pip uninstall transformers` before reinstalling it in editable
+   mode with the `-e` flag.
+   Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install Pytorch then do:
+   ```bash
+   pip install -e ".[quality]"
+   ```
+   which should be enough for most use cases.
+5. Develop the features in your branch.
+   As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+   ```bash
+   pytest tests/<TEST_TO_RUN>.py
+   ```
+   For more information about tests, check out the
+   [Testing](https://huggingface.co/docs/transformers/testing) guide.
+   🤗 Transformers relies on `black` and `ruff` to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+   ```bash
+   make fixup
+   ```
+   This target is also optimized to only work with files modified by the PR you're working on.
+   If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+   ```bash
+   make style
+   ```
+   🤗 Transformers also uses `ruff` and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+   ```bash
+   make quality
+   ```
+   Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+   ```bash
+   make repo-consistency
+   ```
+   To learn more about those checks and how to fix any issues with them, check out the
+   [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.
+   If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the [documentation builder](https://github.com/huggingface/doc-builder).
+   ```bash
+   pip install hf-doc-builder
+   ```
+   Run the following command from the root of the repository:
+   ```bash
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+   ```
+   This will build the documentation in the `~/tmp/test-build` folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+   Once you're happy with your changes, add the changed files with `git add` and
+   record your changes locally with `git commit`:
+   ```bash
+   git add modified_file.py
+   git commit
+   ```
+   Please remember to write [good commit
+   messages](https://chris.beams.io/posts/git-commit/) to clearly communicate the changes you made!
+   To keep your copy of the code up to date with the original
+   repository, rebase your branch on `upstream/branch` *before* you open a pull request or if requested by a maintainer:
+   ```bash
+   git fetch upstream
+   git rebase upstream/main
+   ```
+   Push your changes to your branch:
+   ```bash
+   git push -u origin a-descriptive-name-for-my-changes
+   ```
+   If you've already opened a pull request, you'll need to force push with the `--force` flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+6. Now you can go to your fork of the repository on GitHub and click on **Pull Request** to open a pull request. Make sure you tick off all the boxes on our [checklist](#pull-request-checklist) below. When you're ready, you can send your changes to the project maintainers for review.
+7. It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
+### Pull request checklist
+☐ The pull request title should summarize your contribution.<br>
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).<br>
+☐ To indicate a work in progress please prefix the title with `[WIP]`. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.<br>
+☐ Make sure existing tests pass.<br>
+☐ If adding a new feature, also add tests for it.<br>
+- If you are adding a new model, make sure you use
+     `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)` to trigger the common tests.
+- If you are adding new `@slow` tests, make sure they pass using
+     `RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py`.
+- If you are adding a new tokenizer, write tests and make sure
+     `RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py` passes.
+- CircleCI does not run the slow tests, but GitHub Actions does every night!<br>
+☐ All public methods must have informative docstrings (see
+[`modeling_bert.py`](https://github.com/huggingface/transformers/blob/main/src/transformers/models/bert/modeling_bert.py)
+for an example).<br>
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as [`hf-internal-testing`](https://huggingface.co/hf-internal-testing)
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+[huggingface/documentation-images](https://huggingface.co/datasets/huggingface/documentation-images).
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our [Checks on a Pull Request](https://huggingface.co/docs/transformers/pr_checks) guide.
+### Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the [tests](https://github.com/huggingface/transformers/tree/main/tests) folder and examples tests in the
+[examples](https://github.com/huggingface/transformers/tree/main/examples) folder.
+We like `pytest` and `pytest-xdist` because it's faster. From the root of the
+repository, specify a *path to a subfolder or a test file* to run the test:
+```bash
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+```
+Similarly, for the `examples` directory, specify a *path to a subfolder or test file* to run the test. For example, the following command tests the text classification subfolder in the PyTorch `examples` directory:
+```bash
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+```
+In fact, this is actually how our `make test` and `make test-examples` commands are implemented (not including the `pip install`)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the `RUN_SLOW` environment variable to
+`yes` to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+<Tip warning={true}>
+Remember to specify a *path to a subfolder or a test file* to run the test. Otherwise, you'll run all the tests in the `tests` or `examples` folder, which will take a very long time!
+</Tip>
+```bash
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+```
+Like the slow tests, there are other environment variables available which are not enabled by default during testing:
+- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
+More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
+🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
+`pytest`-specific features in the test suite itself.
+This means `unittest` is fully supported. Here's how to run tests with
+`unittest`:
+```bash
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+```
+### Style guide
+For documentation strings, 🤗 Transformers follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html).
+Check our [documentation writing guide](https://github.com/huggingface/transformers/tree/main/docs#writing-documentation---specification)
+for more information.
+### Develop on Windows
+On Windows (unless you're working in [Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/) or WSL), you need to configure git to transform Windows `CRLF` line endings to Linux `LF` line endings:
+```bash
+git config core.autocrlf input
+```
+One way to run the `make` command on Windows is with MSYS2:
+1. [Download MSYS2](https://www.msys2.org/), and we assume it's installed in `C:\msys64`.
+2. Open the command line `C:\msys64\msys2.exe` (it should be available from the **Start** menu).
+3. Run in the shell: `pacman -Syu` and install `make` with `pacman -S make`.
+4. Add `C:\msys64\usr\bin` to your PATH environment variable.
+You can now use `make` from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+### Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+1. When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+2. If a PR is absolutely necessary, use the following steps after checking out your branch:
+   ```bash
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
+   ```
--- a/ISSUES.md
+++ b/ISSUES.md
+<!---
+Copyright 2020 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# How To Request Support
+This is an Open Source Project so please be mindful that like in any other project of this kind there is no obligation to answer all requests for help.
+However, we want to encourage you to ask for help whenever you think it's needed! We are happy about every question we get because it allows us to better understand your needs, possible misunderstandings, and most importantly a way for you to help us make this library better. That being said, this document's main purpose is to provide guidelines at how you can formulate your requests to increase your chances to be understood and to get support.
+There are two main venues to receive support: [the forums](https://discuss.huggingface.co/) and [the GitHub issues](https://github.com/huggingface/transformers/issues).
+## The Forums
+[The user forums](https://discuss.huggingface.co/) are supported by the wide community of the library users and backed up by developers when needed.
+If you have a difficulty with deploying this library or some questions, or you'd like to discuss a new feature, please first consider discussing those things at the forums. Only when you feel your subject matter has been crystallized and you still need support from the library developers do proceed to file an [issue](https://github.com/huggingface/transformers/issues).
+In particular all "Please explain" questions or objectively very user-specific feature requests belong to the forums. Here are some example of such questions:
+* "I would like to use a BertModel within a RL-Agent for a customer support service. How can I use a BertForMaskedLM in my ChatBotModel?"
+* "Could you please explain why T5 has no positional embedding matrix under T5Model?"
+* "How should I set my generation parameters for translation?"
+* "How to train T5 on De->En translation?"
+## The GitHub Issues
+Everything which hints at a bug should be opened as an [issue](https://github.com/huggingface/transformers/issues).
+You are not required to read the following guidelines before opening an issue. However, if you notice that your issue doesn't get any replies, chances are that the developers have one or several difficulties with its quality. In this case, reading the following points and adjusting your issue accordingly could help.
+1. Before posting an issue, first search for already posted issues, since chances are someone has already asked a similar question before you.
+    If you use Google your search query should be:
+    ```
+    "huggingface" "transformers" your query
+    ```
+    The first two quoted words tell Google to limit the search to the context of the Huggingface Transformers. The remainder is your query - most commonly this would be the error message the software fails with. We will go deeper into details shortly.
+    The results of such a query will typically match GitHub issues, Hugging Face forums, StackExchange, and blogs.
+    If you find relevant hints, you may choose to continue the discussion there if you have follow up questions.
+    If what you found is similar but doesn't quite answer your problem, please, post a new issue and do include links to similar issues or forum discussions you may have found.
+    Let's look at some examples:
+    The error message, often referred to as an assertion, tells us what went wrong. Here is an example of an assertion:
+   ```python
+   Traceback (most recent call last):
+     File "<string>", line 1, in <module>
+     File "/transformers/src/transformers/__init__.py", line 34, in <module>
+       from . import dependency_versions_check
+     File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
+       from .utils import is_tokenizers_available
+     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
+       from tqdm.auto import tqdm
+    ModuleNotFoundError: No module named 'tqdm.auto'
+    ```
+   and it typically includes a traceback, so that we can see the full stack of calls the program made before it fails. This gives us the context to know why the program failed.
+   Going back to the above example. If you received this error search, look at the very last line of the error which is:
+   ```python
+    ModuleNotFoundError: No module named 'tqdm.auto'
+    ```
+    And now we can use it to do the searching on your favorite search engine:
+    1. first for `"huggingface" "transformers" "ModuleNotFoundError: No module named 'tqdm.auto'"`
+    2. if you don't find relevant results, then search for just `"ModuleNotFoundError: No module named 'tqdm.auto'"`
+    3. and finally if nothing still comes up, then remove the outside quotes: `ModuleNotFoundError: No module named 'tqdm.auto'`
+   If the error includes any messages that include bits unique to your filesystem, always remove those in the search query since other users will not have the same filesystem as yours. For example:
+   ```bash
+   python -c 'open("/tmp/wrong_path.txt", "r")'
+   Traceback (most recent call last):
+     File "<string>", line 1, in <module>
+   FileNotFoundError: [Errno 2] No such file or directory: '/tmp/wrong_path.txt'
+   ```
+   Here you'd search for just: `"FileNotFoundError: [Errno 2] No such file or directory"`
+   If the local information that you removed were inside the error message and you removed them you may need to remove double quotes since your query is no longer exact. So if the error message was something like:
+   ```bash
+      ValueError: '/tmp/wrong_path.txt' cannot be found
+   ```
+   then you'd search for `"ValueError" "cannot be found"`
+   As you search you will notice that when you don't use quotes often the search engines will return a variety of unrelated hits, which may or may not be what you want.
+   Experiment with different ways and find which approach gives the most satisfactory results.
+2. Keep the issue short, providing the information that you think will aid the developers to understand your situation. Put yourself in the shoes of the person who has never seen your code or knows anything about your custom setup. This mental exercise will help to develop an intuition to what/what not to share"
+3. If there is a software failure, always provide the full traceback, for example:
+   ```python
+   $ python -c 'import transformers'
+   Traceback (most recent call last):
+     File "<string>", line 1, in <module>
+     File "/transformers/src/transformers/__init__.py", line 34, in <module>
+       from . import dependency_versions_check
+     File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
+       from .utils import is_tokenizers_available
+     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
+       from tqdm.auto import tqdm
+   ModuleNotFoundError: No module named 'tqdm.auto'
+   ```
+   As compared to providing just the last line of the error message, e.g.:
+   ```python
+   ModuleNotFoundError: No module named 'tqdm.auto'
+   ```
+   which is not sufficient.
+   If your application is running on more than one GPU (e.g. under `DistributedDataParallel`) and typically getting every log and traceback printed multiple times, please make sure that you paste only one copy of it. At times the traceback from parallel processes may get interleaved - so either disentangle these or change the loggers to log only for `local_rank==0` so that only one process logs things.
+4. When quoting a traceback, command line instructions and any type of code always enclose it in triple backticks inside the editor window, that is:
+   ````
+   ```
+   git clone https://github.com/huggingface/transformers
+   cd transformers
+   pip install .
+   ```
+   ````
+   If it's a command line with a long argument list, please consider breaking it down using backslashes and new lines. Here is an example of a good command line quote:
+   ```bash
+    cd examples/seq2seq
+    torchrun --nproc_per_node=2 ./finetune_trainer.py \
+    --model_name_or_path sshleifer/distill-mbart-en-ro-12-4 --data_dir wmt_en_ro \
+    --output_dir output_dir \
+    --do_train --n_train 500 --num_train_epochs 1 \
+    --per_device_train_batch_size 1  --freeze_embeds \
+    --src_lang en_XX --tgt_lang ro_RO --task translation \
+    --fp16
+   ```
+   If you don't break it up, one has to scroll horizontally which often makes it quite difficult to quickly see what's happening.
+   The backslashes allow us to copy the command directly into the console to run it, without needing to edit it.
+5. Include only the important information that you think will help the developer to quickly identify the problem.
+   For example applications often create huge amounts of logs. Ask yourself whether providing all or parts of the log is useful.
+   Pasting a 100-1000 lines of log into the issue is an immediate turn off, since it will take a lot of time to figure out where the pertinent parts of the log are.
+   Attaching a full log can be helpful if it's done as an attachment, if it's enclosed in the following html code in the comment editor window:
+   ```
+   <details>
+   <summary>Full log</summary>
+   <pre>
+   many
+   lines
+   go
+   here
+   </pre>
+   </details>
+   ```
+   which would result in the following entry, which can be opened if desired, but otherwise takes little space.
+   <details>
+   <summary>Full log</summary>
+   <pre>
+   many
+   lines
+   go
+   here
+   </pre>
+   </details>
+    You could also provide a link to a pastebin service, but this is less beneficial since those links tend to expire quickly and future readers of your issue might not be able to access that log file anymore and may lack some context.
+6. If this is an issue in your code, do try to reduce that code to a minimal example that still demonstrates the problem. Please ask at the forums if you have a hard time figuring how to do that. Please realize that we don't have the luxury of having time to try and understand all of your custom code.
+   If you really tried to make a short reproducible code but couldn't figure it out, it might be that having a traceback will give the developer enough information to know what's going on. But if it is not enough and we can't reproduce the problem, we can't really solve it.
+   Do not despair if you can't figure it out from the beginning, just share what you can and perhaps someone else will be able to help you at the forums.
+   If your setup involves any custom datasets, the best way to help us reproduce the problem is to create a [Google Colab notebook](https://colab.research.google.com/) that demonstrates the issue and once you verify that the issue still exists, include a link to that notebook in the Issue. Just make sure that you don't copy and paste the location bar url of the open notebook - as this is private and we won't be able to open it. Instead, you need to click on `Share` in the right upper corner of the notebook, select `Get Link` and then copy and paste the public link it will give to you.
+7. If you forked off some of this project's code or example applications, please, do not ask us to go into your code repository and figure out what you may have done. The code is already very complex and unless there is an easy way to do a diff and it's a small diff, it won't be possible to find someone with time on their hands to make a lengthy investigation. Albeit, you might find someone at the forums who will be generous to do this for you.
+8. Before reporting an issue, first, always try to update your environment to the latest official version of this library. We have no resources to go and debug older revisions, which could easily have bugs that have been fixed in the latest released version.
+   We understand that this is not always possible, especially when APIs change, in which case file an issue against the highest library version your environment can support.
+   Of course, if you upgrade the library, always retest that the problem is still there.
+9. Please do not ask us to reproduce an issue with your custom data, since we don't have it. So, either you should use some existing dataset supported by HF datasets or you need to supply a code that generates a small sample on the fly, or some another quick and simple way to get it.
+   Please do not send us any non-public domain data that may require a license or a permission to be used.
+10. Do not tag multiple developers on the issue unless you know this is expected, either because you asked them and they gave you an explicit permission to tag them or the issue template instructs you to do so.
+   The "who to tag for what domain" part of the issue template is there to help users direct their questions to the right developers who are designated maintainers of project's specific domains. They can then decide at their own discretion to tag other developers if they feel it'd help move the issue forward.
+   We currently don't have a triage service and we trust your capacity to identify the right domain and thus the persons to tag in your issue. If you are not sure, please use the forums to ask for guidance.
+   When in doubt, err on the side of not tagging a given person. If you tag multiple people out of context or permission don't be surprised if you get no response at all. Please remember that every time you tag someone, they get a notification and you're taking their time without their permission. Please be sensitive to that.
+   If you got helped by one of the developers in the past please don't tag them in future issues, unless they are listed in the issue template for the domain you are asking about or that developer gave you an explicit permission to tag them in future issues.
+   If you see a certain developer doing multiple and/or recent commits into a specific area of the project that you feel is relevant to your issue, it is not a good reason to tag them. Various developers may be fixing things that prevent them from moving forward, but often their work is focused on a totally different domain. And while they may or may not know how to help you with the problem at hand, it would benefit the whole community much more if they focus on the domain of their unique expertise.
+11. Use the Edit button. Take your time, and re-read and improve the wording and formatting to make your posts and comments as easy to understand as possible.
+    Avoid posting multiple comments in a row, as each comment generates a notification for the developers tagged in that issue. If you happened to post multiple comments in a row, and nobody followed up yet - consider merging those into one or a few comments while editing the combined content to be coherent.
+    If you choose to edit your older comments after others posted follow up comments you need to be aware that your modifications might not be noticed, so if it's not a typo fixing, try to write a new comment flagging that something has been changed in the previous comments.
+    For example, the very first comment is the most important one. If while the thread unfolds you realize that things aren't as they seemed to you originally you may want to edit the first post to reflect the up-to-date understanding of the issue at hand so that it helps those who read your issue in the future quickly understand what's going on and not need to sift through dozens of comments. It also helps to indicate that the post was edited. So, those reading the thread later can understand why there might be certain discontinuity in the information flow.
+    Use bullets and items if you have lists of items and the outcome improves overall readability.
+    Use backticks to refer to class and function names, e.g. `BartModel` and `generate` as these stand out and improve the speed of a reader's comprehension.
+    Try not use italics and bold text too much as these often make the text more difficult to read.
+12. If you are cross-referencing a specific comment in a given thread or another issue, always link to that specific comment, rather than using the issue link. If you do the latter it could be quite impossible to find which specific comment you're referring to.
+    To get the link to the specific comment do not copy the url from the location bar of your browser, but instead, click the `...` icon in the upper right corner of the comment and then select "Copy Link".
+    For example the first link is a link to an issue, and the second to a specific comment in the same issue:
+    1. https://github.com/huggingface/transformers/issues/9257
+    2. https://github.com/huggingface/transformers/issues/9257#issuecomment-749945162
+13. If you are replying to a last comment, it's totally fine to make your reply with just your comment in it. The readers can follow the information flow here.
+    But if you're replying to a comment that happened some comments back it's always a good practice to quote just the relevant lines you're replying it. The `>` is used for quoting, or you can always use the menu to do so. For example your editor box will look like:
+    ```
+    > How big is your GPU cluster?
+    Our cluster is made of 256 GPUs.
+    ```
+    If you are addressing multiple comments, quote the relevant parts of each before your answer. Some people use the same comment to do multiple replies, others separate them into separate comments. Either way works. The latter approach helps for linking to a specific comment.
+In general the best way to figure out what works the best is learn from issues posted by other people - see which issues get great responses and which get little to no response - observe what the posters who received great responses did differently from those who did not.
+Thank you for reading this somewhat lengthy document. We would like to conclude that these are not absolute rules, but a friendly advice that will help maximize the chances for us to understand what you are trying to communicate, reproduce the problem then resolve it to your satisfaction and the benefit of the whole community.
+If after reading this document there are remaining questions on how and why or there is a need for further elucidation, please, don't hesitate to ask your question in [this thread](https://discuss.huggingface.co/t/how-to-request-support/3128).
--- a/LICENSE
+++ b/LICENSE
+Copyright 2018- The Hugging Face team. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/Makefile
+++ b/Makefile
+.PHONY: deps_table_update modified_only_fixup extra_style_checks quality style fixup fix-copies test test-examples benchmark
+# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
+export PYTHONPATH = src
+check_dirs := examples tests src utils scripts benchmark benchmark_v2
+exclude_folders :=  ""
+modified_only_fixup:
+	@current_branch=$$(git branch --show-current); \
+	if [ "$$current_branch" = "main" ]; then \
+		echo "On main branch, running 'style' target instead..."; \
+		$(MAKE) style; \
+	else \
+		modified_py_files=$$(python utils/get_modified_files.py $(check_dirs)); \
+		if [ -n "$$modified_py_files" ]; then \
+			echo "Checking/fixing files: $${modified_py_files}"; \
+			ruff check $${modified_py_files} --fix --exclude $(exclude_folders); \
+			ruff format $${modified_py_files} --exclude $(exclude_folders); \
+		else \
+			echo "No library .py files were modified"; \
+		fi; \
+	fi
+# Update src/transformers/dependency_versions_table.py
+deps_table_update:
+	@python setup.py deps_table_update
+deps_table_check_updated:
+	@md5sum src/transformers/dependency_versions_table.py > md5sum.saved
+	@python setup.py deps_table_update
+	@md5sum -c --quiet md5sum.saved || (printf "\nError: the version dependency table is outdated.\nPlease run 'make fixup' or 'make style' and commit the changes.\n\n" && exit 1)
+	@rm md5sum.saved
+# autogenerating code
+autogenerate_code: deps_table_update
+# Check that the repo is in a good state
+repo-consistency:
+	python utils/check_copies.py
+	python utils/check_modular_conversion.py
+	python utils/check_dummies.py
+	python utils/check_repo.py
+	python utils/check_inits.py
+	python utils/check_pipeline_typing.py
+	python utils/check_config_docstrings.py
+	python utils/check_config_attributes.py
+	python utils/check_doctest_list.py
+	python utils/update_metadata.py --check-only
+	python utils/check_docstrings.py
+	python utils/add_dates.py
+# this target runs checks on all files
+quality:
+	@python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
+	ruff check $(check_dirs) setup.py conftest.py
+	ruff format --check $(check_dirs) setup.py conftest.py
+	python utils/sort_auto_mappings.py --check_only
+	python utils/check_doc_toc.py
+	python utils/check_docstrings.py --check_all
+# Format source code automatically and check is there are any problems left that need manual fixing
+extra_style_checks:
+	python utils/sort_auto_mappings.py
+	python utils/check_doc_toc.py --fix_and_overwrite
+# this target runs checks on all files and potentially modifies some of them
+style:
+	ruff check $(check_dirs) setup.py conftest.py --fix --exclude $(exclude_folders)
+	ruff format $(check_dirs) setup.py conftest.py --exclude $(exclude_folders)
+	${MAKE} autogenerate_code
+	${MAKE} extra_style_checks
+# Super fast fix and check target that only works on relevant modified files since the branch was made
+fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
+# Make marked copies of snippets of codes conform to the original
+fix-copies:
+	python utils/check_copies.py --fix_and_overwrite
+	python utils/check_modular_conversion.py --fix_and_overwrite
+	python utils/check_dummies.py --fix_and_overwrite
+	python utils/check_pipeline_typing.py --fix_and_overwrite
+	python utils/check_doctest_list.py --fix_and_overwrite
+	python utils/check_docstrings.py --fix_and_overwrite
+# Run tests for the library
+test:
+	python -m pytest -n auto --dist=loadfile -s -v ./tests/
+# Run tests for examples
+test-examples:
+	python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/
+# Run benchmark
+benchmark:
+	python3 benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=diff backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
+# Run tests for SageMaker DLC release
+test-sagemaker: # install sagemaker dependencies in advance with pip install .[sagemaker]
+	TEST_SAGEMAKER=True python -m pytest -n auto  -s -v ./tests/sagemaker
+# Release stuff
+pre-release:
+	python utils/release.py
+pre-patch:
+	python utils/release.py --patch
+post-release:
+	python utils/release.py --post_release
+post-patch:
+	python utils/release.py --post_release --patch
+build-release:
+	rm -rf dist
+	rm -rf build
+	python setup.py bdist_wheel
+	python setup.py sdist
+	python utils/check_build.py
--- a/README.md
+++ b/README.md
+# Bert-large infer
+### Fine-tuning BERT on SQuAD1.0
+The [`run_qa.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/question-answering/run_qa.py) script
+allows to fine-tune any model from our [hub](https://huggingface.co/models) (as long as its architecture has a `ForQuestionAnswering` version in the library) on a question-answering dataset (such as SQuAD, or any other QA dataset available in the `datasets` library, or your own csv/jsonlines files) as long as they are structured the same way as SQuAD. You might need to tweak the data processing inside the script if your data is structured differently.
+**Note:** This script only works with models that have a fast tokenizer (backed by the 🤗 Tokenizers library) as it
+uses special features of those tokenizers. You can check if your favorite model has a fast tokenizer in
+[this table](https://huggingface.co/transformers/index.html#supported-frameworks), if it doesn't you can still use the old version of the script which can be found [here](https://github.com/huggingface/transformers/tree/main/examples/legacy/question-answering).
+Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`.
+- [train-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json)
+- [dev-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json)
+- [evaluate-v1.1.py](https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py)
+- This fine-tuned model is available as a checkpoint under the reference [`bert-large-uncased-whole-word-masking-finetuned-squad`](https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad).
+This example code fine-tunes BERT on the SQuAD1.0 dataset.
+```bash
+python /nx/transformers/examples/pytorch/question-answering/run_qa.py \
+  --model_name_or_path /models/google-bert/bert-large-uncased-whole-word-masking-finetuned-squad \
+  --dataset_name squad \
+  --do_train \
+  --do_eval \
+  --per_device_train_batch_size 12 \
+  --learning_rate 3e-5 \
+  --num_train_epochs 2 \
+  --max_seq_length 384 \
+  --doc_stride 128 \
+  --output_dir /nx/transformers/debug_squad/
+```
+Training with the previously defined hyper-parameters yields the following results:
+```bash
+f1 = 92.56
+exact_match = 85.74
+```
+### 多卡推理
+```
+bash fine-tuning_SQuAD1.0.sh
+```
+### result
--- a/SECURITY.md
+++ b/SECURITY.md
+# Security Policy
+## Hugging Face Hub, remote artefacts, and remote code
+Transformers is open-source software that is tightly coupled to the Hugging Face Hub. While you have the ability to use it
+offline with pre-downloaded model weights, it provides a very simple way to download, use, and manage models locally.
+When downloading artefacts that have been uploaded by others on any platform, you expose yourself to risks. Please
+read below for the security recommendations in order to keep your runtime and local environment safe.
+### Remote artefacts
+Models uploaded on the Hugging Face Hub come in different formats. We heavily recommend uploading and downloading
+models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
+by the transformers library), as developed specifically to prevent arbitrary code execution on your system.
+To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
+### Remote code
+#### Modeling
+Transformers supports many model architectures, but is also the bridge between your Python runtime and models that
+are stored in model repositories on the Hugging Face Hub.
+These models require the `trust_remote_code=True` parameter to be set when using them; please **always** verify
+the content of the modeling files when using this argument. We recommend setting a revision in order to ensure you
+protect yourself from updates on the repository.
+## Reporting a Vulnerability
+Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software.
--- a/awesome-transformers.md
+++ b/awesome-transformers.md
+# Awesome projects built with Transformers
+This page lists awesome projects built on top of Transformers. Transformers is more than a toolkit to use pretrained
+models: it's a community of projects built around it and the Hugging Face Hub. We want Transformers to enable
+developers, researchers, students, professors, engineers, and anyone else to build their dream projects.
+In this list, we showcase incredibly impactful and novel projects that have pushed the field forward. We celebrate
+100 of these projects as we reach the milestone of 100k stars as a community; but we're very open to pull requests
+adding other projects to the list. If you believe a project should be here and it's not, then please, open a PR
+to add it.
+## [gpt4all](https://github.com/nomic-ai/gpt4all)
+[gpt4all](https://github.com/nomic-ai/gpt4all) is an ecosystem of open-source chatbots trained on massive collections of clean assistant data including code, stories and dialogue. It offers open-source, large language models such as LLaMA and GPT-J trained in an assistant-style.
+Keywords: Open-source, LLaMa, GPT-J, instruction, assistant
+## [recommenders](https://github.com/recommenders-team/recommenders)
+This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. It goes over several aspects required to build efficient recommendation systems: data preparation, modeling, evaluation, model selection & optimization, as well as operationalization
+Keywords: Recommender systems, AzureML
+## [IOPaint](https://github.com/Sanster/IOPaint)
+Image inpainting tool powered by Stable Diffusion. Remove any unwanted object, defect, people from your pictures or erase and replace anything on your pictures.
+Keywords: inpainting, SD, Stable Diffusion
+## [flair](https://github.com/flairNLP/flair)
+FLAIR is a powerful PyTorch NLP framework, covering several important tasks: NER, sentiment-analysis, part-of-speech tagging, text and document embeddings, among other things.
+Keywords: NLP, text embedding, document embedding, biomedical, NER, PoS, sentiment-analysis
+## [mindsdb](https://github.com/mindsdb/mindsdb)
+MindsDB is a low-code ML platform, which automates and integrates several ML frameworks into the data stack as "AI Tables" to streamline the integration of AI into applications, making it accessible to developers of all skill levels.
+Keywords: Database, low-code, AI table
+## [langchain](https://github.com/langchain-ai/langchain)
+[langchain](https://github.com/langchain-ai/langchain) is aimed at assisting in the development of apps merging both LLMs and other sources of knowledge. The library allows chaining calls to applications, creating a sequence across many tools.
+Keywords: LLMs, Large Language Models, Agents, Chains
+## [LlamaIndex](https://github.com/run-llama/llama_index)
+[LlamaIndex](https://github.com/run-llama/llama_index) is a project that provides a central interface to connect your LLM's with external data. It provides various kinds of indices and retrieval mechanisms to perform different LLM tasks and obtain knowledge-augmented results.
+Keywords: LLMs, Large Language Models, Data Retrieval, Indices, Knowledge Augmentation
+## [ParlAI](https://github.com/facebookresearch/ParlAI)
+[ParlAI](https://github.com/facebookresearch/ParlAI) is a python framework for sharing, training and testing dialogue models, from open-domain chitchat, to task-oriented dialogue, to visual question answering. It provides more than 100 datasets under the same API, a large zoo of pretrained models, a set of agents, and has several integrations.
+Keywords: Dialogue, Chatbots, VQA, Datasets, Agents
+## [sentence-transformers](https://github.com/UKPLab/sentence-transformers)
+This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various task. Text is embedding in vector space such that similar text is close and can efficiently be found using cosine similarity.
+Keywords: Dense vector representations, Text embeddings, Sentence embeddings
+## [ludwig](https://github.com/ludwig-ai/ludwig)
+Ludwig is a declarative machine learning framework that makes it easy to define machine learning pipelines using a simple and flexible data-driven configuration system. Ludwig is targeted at a wide variety of AI tasks. It provides a data-driven configuration system, training, prediction, and evaluation scripts, as well as a programmatic API.
+Keywords: Declarative, Data-driven, ML Framework
+## [InvokeAI](https://github.com/invoke-ai/InvokeAI)
+[InvokeAI](https://github.com/invoke-ai/InvokeAI) is an engine for Stable Diffusion models, aimed at professionals, artists, and enthusiasts. It leverages the latest AI-driven technologies through CLI as well as a WebUI.
+Keywords: Stable-Diffusion, WebUI, CLI
+## [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP)
+[PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) is an easy-to-use and powerful NLP library particularly targeted at the Chinese languages. It has support for multiple pre-trained model zoos, and supports a wide-range of NLP tasks from research to industrial applications.
+Keywords: NLP, Chinese, Research, Industry
+## [stanza](https://github.com/stanfordnlp/stanza)
+The Stanford NLP Group's official Python NLP library. It contains support for running various accurate natural language processing tools on 60+ languages and for accessing the Java Stanford CoreNLP software from Python.
+Keywords: NLP, Multilingual, CoreNLP
+## [DeepPavlov](https://github.com/deeppavlov/DeepPavlov)
+[DeepPavlov](https://github.com/deeppavlov/DeepPavlov) is an open-source conversational AI library. It is designed for the development of production ready chat-bots and complex conversational systems, as well as research in the area of NLP and, particularly, of dialog systems.
+Keywords: Conversational, Chatbot, Dialog
+## [alpaca-lora](https://github.com/tloen/alpaca-lora)
+Alpaca-lora contains code for reproducing the Stanford Alpaca results using low-rank adaptation (LoRA). The repository provides training (fine-tuning) as well as generation scripts.
+Keywords: LoRA, Parameter-efficient fine-tuning
+## [imagen-pytorch](https://github.com/lucidrains/imagen-pytorch)
+An open-source Implementation of Imagen, Google's closed-source Text-to-Image Neural Network that beats DALL-E2. As of release, it is the new SOTA for text-to-image synthesis.
+Keywords: Imagen, Text-to-image
+## [adapters](https://github.com/adapter-hub/adapters)
+[adapters](https://github.com/adapter-hub/adapters) is an extension of HuggingFace's Transformers library, integrating adapters into state-of-the-art language models by incorporating AdapterHub, a central repository for pre-trained adapter modules. It is a drop-in replacement for transformers, which is regularly updated to stay up-to-date with the developments of transformers.
+Keywords: Adapters, LoRA, Parameter-efficient fine-tuning, Hub
+## [NeMo](https://github.com/NVIDIA/NeMo)
+NVIDIA [NeMo](https://github.com/NVIDIA/NeMo) is a conversational AI toolkit built for researchers working on automatic speech recognition (ASR), text-to-speech synthesis (TTS), large language models (LLMs), and natural language processing (NLP). The primary objective of [NeMo](https://github.com/NVIDIA/NeMo) is to help researchers from industry and academia to reuse prior work (code and pretrained models) and make it easier to create new https://developer.nvidia.com/conversational-ai#started.
+Keywords: Conversational, ASR, TTS, LLMs, NLP
+## [Runhouse](https://github.com/run-house/runhouse)
+[Runhouse](https://github.com/run-house/runhouse) allows to send code and data to any of your compute or data infra, all in Python, and continue to interact with them normally from your existing code and environment. Runhouse developers mention:
+> Think of it as an expansion pack to your Python interpreter that lets it take detours to remote machines or manipulate remote data.
+Keywords: MLOps, Infrastructure, Data storage, Modeling
+## [MONAI](https://github.com/Project-MONAI/MONAI)
+[MONAI](https://github.com/Project-MONAI/MONAI) is a PyTorch-based, open-source framework for deep learning in healthcare imaging, part of PyTorch Ecosystem. Its ambitions are:
+- developing a community of academic, industrial and clinical researchers collaborating on a common foundation;
+- creating state-of-the-art, end-to-end training workflows for healthcare imaging;
+- providing researchers with the optimized and standardized way to create and evaluate deep learning models.
+Keywords: Healthcare imaging, Training, Evaluation
+## [simpletransformers](https://github.com/ThilinaRajapakse/simpletransformers)
+Simple Transformers lets you quickly train and evaluate Transformer models. Only 3 lines of code are needed to initialize, train, and evaluate a model. It supports a wide variety of NLP tasks.
+Keywords: Framework, simplicity, NLP
+## [JARVIS](https://github.com/microsoft/JARVIS)
+[JARVIS](https://github.com/microsoft/JARVIS) is a system attempting to merge LLMs such as GPT-4 with the rest of the open-source ML community: leveraging up to 60 downstream models in order to perform tasks identified by the LLM.
+Keywords: LLM, Agents, HF Hub
+## [transformers.js](https://github.com/huggingface/transformers.js/)
+[transformers.js](https://github.com/huggingface/transformers.js/) is a JavaScript library targeted at running models from transformers directly within the browser.
+Keywords: Transformers, JavaScript, browser
+## [bumblebee](https://github.com/elixir-nx/bumblebee)
+Bumblebee provides pre-trained Neural Network models on top of Axon, a neural networks library for the Elixir language. It includes integration with 🤗 Models, allowing anyone to download and perform Machine Learning tasks with few lines of code.
+Keywords: Elixir, Axon
+## [argilla](https://github.com/argilla-io/argilla)
+Argilla is an open-source platform providing advanced NLP labeling, monitoring, and workspaces. It is compatible with many open source ecosystems such as Hugging Face, Stanza, FLAIR, and others.
+Keywords: NLP, Labeling, Monitoring, Workspaces
+## [haystack](https://github.com/deepset-ai/haystack)
+Haystack is an open source NLP framework to interact with your data using Transformer models and LLMs. It offers production-ready tools to quickly build complex decision making, question answering, semantic search, text generation applications, and more.
+Keywords: NLP, Framework, LLM
+## [spaCy](https://github.com/explosion/spaCy)
+[spaCy](https://github.com/explosion/spaCy) is a library for advanced Natural Language Processing in Python and Cython. It's built on the very latest research, and was designed from day one to be used in real products. It offers support for transformers models through its third party package, spacy-transformers.
+Keywords: NLP, Framework
+## [speechbrain](https://github.com/speechbrain/speechbrain)
+SpeechBrain is an open-source and all-in-one conversational AI toolkit based on PyTorch.
+The goal is to create a single, flexible, and user-friendly toolkit that can be used to easily develop state-of-the-art speech technologies, including systems for speech recognition, speaker recognition, speech enhancement, speech separation, language identification, multi-microphone signal processing, and many others.
+Keywords: Conversational, Speech
+## [skorch](https://github.com/skorch-dev/skorch)
+Skorch is a scikit-learn compatible neural network library that wraps PyTorch. It has support for models within transformers, and tokenizers from tokenizers.
+Keywords: Scikit-Learn, PyTorch
+## [bertviz](https://github.com/jessevig/bertviz)
+BertViz is an interactive tool for visualizing attention in Transformer language models such as BERT, GPT2, or T5. It can be run inside a Jupyter or Colab notebook through a simple Python API that supports most Huggingface models.
+Keywords: Visualization, Transformers
+## [mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax)
+[mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax) is a haiku library using the xmap/pjit operators in JAX for model parallelism of transformers. This library is designed for scalability up to approximately 40B parameters on TPUv3s. It was the library used to train the GPT-J model.
+Keywords: Haiku, Model parallelism, LLM, TPU
+## [deepchem](https://github.com/deepchem/deepchem)
+DeepChem aims to provide a high quality open-source toolchain that democratizes the use of deep-learning in drug discovery, materials science, quantum chemistry, and biology.
+Keywords: Drug discovery, Materials Science, Quantum Chemistry, Biology
+## [OpenNRE](https://github.com/thunlp/OpenNRE)
+An Open-Source Package for Neural Relation Extraction (NRE). It is targeted at a wide range of users, from newcomers to relation extraction, to developers, researchers, or students.
+Keywords: Neural Relation Extraction, Framework
+## [pycorrector](https://github.com/shibing624/pycorrector)
+PyCorrector is a Chinese Text Error Correction Tool. It uses a language model to detect errors, pinyin feature and shape feature to correct Chinese text errors. it can be used for Chinese Pinyin and stroke input method.
+Keywords: Chinese, Error correction tool, Language model, Pinyin
+## [nlpaug](https://github.com/makcedward/nlpaug)
+This python library helps you with augmenting nlp for machine learning projects. It is a lightweight library featuring synthetic data generation for improving model performance, support for audio and text, and compatibility with several ecosystems (scikit-learn, pytorch, tensorflow).
+Keywords: Data augmentation, Synthetic data generation, Audio, NLP
+## [dream-textures](https://github.com/carson-katri/dream-textures)
+[dream-textures](https://github.com/carson-katri/dream-textures) is a library targeted at bringing stable-diffusion support within Blender. It supports several use-cases, such as image generation, texture projection, inpainting/outpainting, ControlNet, and upscaling.
+Keywords: Stable-Diffusion, Blender
+## [seldon-core](https://github.com/SeldonIO/seldon-core)
+Seldon core converts your ML models (Tensorflow, Pytorch, H2o, etc.) or language wrappers (Python, Java, etc.) into production REST/GRPC microservices.
+Seldon handles scaling to thousands of production machine learning models and provides advanced machine learning capabilities out of the box including Advanced Metrics, Request Logging, Explainers, Outlier Detectors, A/B Tests, Canaries and more.
+Keywords: Microservices, Modeling, Language wrappers
+## [open_model_zoo](https://github.com/openvinotoolkit/open_model_zoo)
+This repository includes optimized deep learning models and a set of demos to expedite development of high-performance deep learning inference applications. Use these free pre-trained models instead of training your own models to speed-up the development and production deployment process.
+Keywords: Optimized models, Demos
+## [ml-stable-diffusion](https://github.com/apple/ml-stable-diffusion)
+ML-Stable-Diffusion is a repository by Apple bringing Stable Diffusion support to Core ML, on Apple Silicon devices. It supports stable diffusion checkpoints hosted on the Hugging Face Hub.
+Keywords: Stable Diffusion, Apple Silicon, Core ML
+## [stable-dreamfusion](https://github.com/ashawkey/stable-dreamfusion)
+Stable-Dreamfusion is a pytorch implementation of the text-to-3D model Dreamfusion, powered by the Stable Diffusion text-to-2D model.
+Keywords: Text-to-3D, Stable Diffusion
+## [txtai](https://github.com/neuml/txtai)
+[txtai](https://github.com/neuml/txtai) is an open-source platform for semantic search and workflows powered by language models. txtai builds embeddings databases, which are a union of vector indexes and relational databases enabling similarity search with SQL. Semantic workflows connect language models together into unified applications.
+Keywords: Semantic search, LLM
+## [djl](https://github.com/deepjavalibrary/djl)
+Deep Java Library (DJL) is an open-source, high-level, engine-agnostic Java framework for deep learning. DJL is designed to be easy to get started with and simple to use for developers. DJL provides a native Java development experience and functions like any other regular Java library. DJL offers [a Java binding](https://github.com/deepjavalibrary/djl/tree/master/extensions/tokenizers) for HuggingFace Tokenizers and easy conversion toolkit for HuggingFace model to deploy in Java.
+Keywords: Java, Framework
+## [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/)
+This project provides a unified framework to test generative language models on a large number of different evaluation tasks. It has support for more than 200 tasks, and supports different ecosystems: HF Transformers, GPT-NeoX, DeepSpeed, as well as the OpenAI API.
+Keywords: LLM, Evaluation, Few-shot
+## [gpt-neox](https://github.com/EleutherAI/gpt-neox)
+This repository records EleutherAI's library for training large-scale language models on GPUs. The framework is based on NVIDIA's Megatron Language Model and has been augmented with techniques from DeepSpeed as well as some novel optimizations. It is focused on training multi-billion-parameter models.
+Keywords: Training, LLM, Megatron, DeepSpeed
+## [muzic](https://github.com/microsoft/muzic)
+Muzic is a research project on AI music that empowers music understanding and generation with deep learning and artificial intelligence. Muzic was created by researchers from Microsoft Research Asia.
+Keywords: Music understanding, Music generation
+## [dalle-flow](https://github.com/jina-ai/dalle-flow)
+DALL·E Flow is an interactive workflow for generating high-definition images from a text prompt. It leverages DALL·E-Mega, GLID-3 XL, and Stable Diffusion to generate image candidates, and then calls CLIP-as-service to rank the candidates w.r.t. the prompt.
+The preferred candidate is fed to GLID-3 XL for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via SwinIR.
+Keywords: High-definition image generation, Stable Diffusion, DALL-E Mega, GLID-3 XL, CLIP, SwinIR
+## [lightseq](https://github.com/bytedance/lightseq)
+LightSeq is a high performance training and inference library for sequence processing and generation implemented in CUDA. It enables highly efficient computation of modern NLP and CV models such as BERT, GPT, Transformer, etc. It is therefore best useful for machine translation, text generation, image classification, and other sequence related tasks.
+Keywords: Training, Inference, Sequence Processing, Sequence Generation
+## [LaTeX-OCR](https://github.com/lukas-blecher/LaTeX-OCR)
+The goal of this project is to create a learning based system that takes an image of a math formula and returns corresponding LaTeX code.
+Keywords: OCR, LaTeX, Math formula
+## [open_clip](https://github.com/mlfoundations/open_clip)
+OpenCLIP is an open source implementation of OpenAI's CLIP.
+The goal of this repository is to enable training models with contrastive image-text supervision, and to investigate their properties such as robustness to distribution shift.
+The starting point is an implementation of CLIP that matches the accuracy of the original CLIP models when trained on the same dataset.
+Specifically, a ResNet-50 model trained with this codebase on OpenAI's 15 million image subset of YFCC achieves 32.7% top-1 accuracy on ImageNet.
+Keywords: CLIP, Open-source, Contrastive, Image-text
+## [dalle-playground](https://github.com/saharmor/dalle-playground)
+A playground to generate images from any text prompt using Stable Diffusion and Dall-E mini.
+Keywords: WebUI, Stable Diffusion, Dall-E mini
+## [FedML](https://github.com/FedML-AI/FedML)
+[FedML](https://github.com/FedML-AI/FedML) is a federated learning and analytics library enabling secure and collaborative machine learning on decentralized data anywhere at any scale.
+It supports large-scale cross-silo federated learning, and cross-device federated learning on smartphones/IoTs, and research simulation.
+Keywords: Federated Learning, Analytics, Collaborative ML, Decentralized
+## [gpt-code-clippy](https://github.com/CodedotAl/gpt-code-clippy)
+GPT-Code-Clippy (GPT-CC) is an open source version of GitHub Copilot, a language model -- based on GPT-3, called GPT-Codex -- that is fine-tuned on publicly available code from GitHub.
+Keywords: LLM, Code
+## [TextAttack](https://github.com/QData/TextAttack)
+[TextAttack](https://github.com/QData/TextAttack) 🐙 is a Python framework for adversarial attacks, data augmentation, and model training in NLP.
+Keywords: Adversarial attacks, Data augmentation, NLP
+## [OpenPrompt](https://github.com/thunlp/OpenPrompt)
+Prompt-learning is a paradigm to adapt pre-trained language models (PLMs) to downstream NLP tasks, which modify the input text with a textual template and directly uses PLMs to conduct pre-trained tasks. This library provides a standard, flexible and extensible framework to deploy the prompt-learning pipeline. [OpenPrompt](https://github.com/thunlp/OpenPrompt) supports loading PLMs directly from https://github.com/huggingface/transformers.
+## [text-generation-webui](https://github.com/oobabooga/text-generation-webui/)
+[text-generation-webui](https://github.com/oobabooga/text-generation-webui/) is a Gradio Web UI for running Large Language Models like LLaMA, llama.cpp, GPT-J, Pythia, OPT, and GALACTICA.
+Keywords: LLM, WebUI
+## [libra](https://github.com/Palashio/libra)
+An ergonomic machine learning [libra](https://github.com/Palashio/libra)ry for non-technical users. It focuses on ergonomics and on ensuring that training a model is as simple as it can be.
+Keywords: Ergonomic, Non-technical
+## [alibi](https://github.com/SeldonIO/alibi)
+Alibi is an open source Python library aimed at machine learning model inspection and interpretation. The focus of the library is to provide high-quality implementations of black-box, white-box, local and global explanation methods for classification and regression models.
+Keywords: Model inspection, Model interpretation, Black-box, White-box
+## [tortoise-tts](https://github.com/neonbjb/tortoise-tts)
+Tortoise is a text-to-speech program built with the following priorities: strong multi-voice capabilities, and highly realistic prosody and intonation.
+Keywords: Text-to-speech
+## [flower](https://github.com/adap/flower)
+Flower (flwr) is a framework for building federated learning systems. The design of Flower is based on a few guiding principles: customizability, extendability, framework agnosticity, and ease-of-use.
+Keywords: Federated learning systems, Customizable, Extendable, Framework-agnostic, Simplicity
+## [fast-bert](https://github.com/utterworks/fast-bert)
+Fast-Bert is a deep learning library that allows developers and data scientists to train and deploy BERT and XLNet based models for natural language processing tasks beginning with Text Classification. It is aimed at simplicity.
+Keywords: Deployment, BERT, XLNet
+## [towhee](https://github.com/towhee-io/towhee)
+Towhee makes it easy to build neural data processing pipelines for AI applications. We provide hundreds of models, algorithms, and transformations that can be used as standard pipeline building blocks. Users can use Towhee's Pythonic API to build a prototype of their pipeline and automatically optimize it for production-ready environments.
+Keywords: Data processing pipeline, Optimization
+## [alibi-detect](https://github.com/SeldonIO/alibi-detect)
+Alibi Detect is an open source Python library focused on outlier, adversarial and drift detection. The package aims to cover both online and offline detectors for tabular data, text, images and time series. Both TensorFlow and PyTorch backends are supported for drift detection.
+Keywords: Adversarial, Outlier, Drift detection
+## [FARM](https://github.com/deepset-ai/FARM)
+[FARM](https://github.com/deepset-ai/FARM) makes Transfer Learning with BERT & Co simple, fast and enterprise-ready. It's built upon transformers and provides additional features to simplify the life of developers: Parallelized preprocessing, highly modular design, multi-task learning, experiment tracking, easy debugging and close integration with AWS SageMaker.
+Keywords: Transfer Learning, Modular design, Multi-task learning, Experiment tracking
+## [aitextgen](https://github.com/minimaxir/aitextgen)
+A robust Python tool for text-based AI training and generation using OpenAI's GPT-2 and EleutherAI's GPT Neo/GPT-3 architecture.
+[aitextgen](https://github.com/minimaxir/aitextgen) is a Python package that leverages PyTorch, Hugging Face Transformers and pytorch-lightning with specific optimizations for text generation using GPT-2, plus many added features.
+Keywords: Training, Generation
+## [diffgram](https://github.com/diffgram/diffgram)
+Diffgram aims to integrate human supervision into platforms. We support your team programmatically changing the UI (Schema, layout, etc.) like in Streamlit. This means that you can collect and annotate timely data from users. In other words, we are the platform behind your platform, an integrated part of your application, to ship new & better AI products faster.
+Keywords: Human supervision, Platform
+## [ecco](https://github.com/jalammar/ecco)
+Explain, analyze, and visualize NLP language models. Ecco creates interactive visualizations directly in Jupyter notebooks explaining the behavior of Transformer-based language models (like GPT2, BERT, RoBERTA, T5, and T0).
+Keywords: Model explainability
+## [s3prl](https://github.com/s3prl/s3prl)
+[s3prl](https://github.com/s3prl/s3prl) stands for Self-Supervised Speech Pre-training and Representation Learning. Self-supervised speech pre-trained models are called upstream in this toolkit, and are utilized in various downstream tasks.
+Keywords: Speech, Training
+## [ru-dalle](https://github.com/ai-forever/ru-dalle)
+RuDALL-E aims to be similar to DALL-E, targeted to Russian.
+Keywords: DALL-E, Russian
+## [DeepKE](https://github.com/zjunlp/DeepKE)
+[DeepKE](https://github.com/zjunlp/DeepKE) is a knowledge extraction toolkit for knowledge graph construction supporting cnSchema，low-resource, document-level and multimodal scenarios for entity, relation and attribute extraction.
+Keywords: Knowledge Extraction, Knowledge Graphs
+## [Nebuly](https://github.com/nebuly-ai/optimate)
+Nebuly is the next-generation platform to monitor and optimize your AI costs in one place. The platform connects to all your AI cost sources (compute, API providers, AI software licenses, etc) and centralizes them in one place to give you full visibility on a model basis. The platform also provides optimization recommendations and a co-pilot model that can guide during the optimization process. The platform builds on top of the open-source tools allowing you to optimize the different steps of your AI stack to squeeze out the best possible cost performances.
+Keywords: Optimization, Performance, Monitoring
+## [imaginAIry](https://github.com/brycedrennan/imaginAIry)
+Offers a CLI and a Python API to generate images with Stable Diffusion. It has support for many tools, like image structure control (controlnet), instruction-based image edits (InstructPix2Pix), prompt-based masking (clipseg), among others.
+Keywords: Stable Diffusion, CLI, Python API
+## [sparseml](https://github.com/neuralmagic/sparseml)
+SparseML is an open-source model optimization toolkit that enables you to create inference-optimized sparse models using pruning, quantization, and distillation algorithms. Models optimized with SparseML can then be exported to the ONNX and deployed with DeepSparse for GPU-class performance on CPU hardware.
+Keywords: Model optimization, Pruning, Quantization, Distillation
+## [opacus](https://github.com/pytorch/opacus)
+Opacus is a library that enables training PyTorch models with differential privacy. It supports training with minimal code changes required on the client, has little impact on training performance, and allows the client to online track the privacy budget expended at any given moment.
+Keywords: Differential privacy
+## [LAVIS](https://github.com/salesforce/LAVIS)
+[LAVIS](https://github.com/salesforce/LAVIS) is a Python deep learning library for LAnguage-and-VISion intelligence research and applications. This library aims to provide engineers and researchers with a one-stop solution to rapidly develop models for their specific multimodal scenarios, and benchmark them across standard and customized datasets. It features a unified interface design to access
+Keywords: Multimodal, NLP, Vision
+## [buzz](https://github.com/chidiwilliams/buzz)
+Buzz transcribes and translates audio offline on your personal computer. Powered by OpenAI's Whisper.
+Keywords: Audio transcription, Translation
+## [rust-bert](https://github.com/guillaume-be/rust-bert)
+Rust-native state-of-the-art Natural Language Processing models and pipelines. Port of Hugging Face's Transformers library, using the tch-rs crate and pre-processing from rust-tokenizers. Supports multi-threaded tokenization and GPU inference. This repository exposes the model base architecture, task-specific heads and ready-to-use pipelines.
+Keywords: Rust, BERT, Inference
+## [EasyNLP](https://github.com/alibaba/EasyNLP)
+[EasyNLP](https://github.com/alibaba/EasyNLP) is an easy-to-use NLP development and application toolkit in PyTorch, first released inside Alibaba in 2021. It is built with scalable distributed training strategies and supports a comprehensive suite of NLP algorithms for various NLP applications. [EasyNLP](https://github.com/alibaba/EasyNLP) integrates knowledge distillation and few-shot learning for landing large pre-trained models, together with various popular multi-modality pre-trained models. It provides a unified framework of model training, inference, and deployment for real-world applications.
+Keywords: NLP, Knowledge distillation, Few-shot learning, Multi-modality, Training, Inference, Deployment
+## [TurboTransformers](https://github.com/Tencent/TurboTransformers)
+A fast and user-friendly runtime for transformer inference (Bert, Albert, GPT2, Decoders, etc) on CPU and GPU.
+Keywords: Optimization, Performance
+## [hivemind](https://github.com/learning-at-home/hivemind)
+Hivemind is a PyTorch library for decentralized deep learning across the Internet. Its intended usage is training one large model on hundreds of computers from different universities, companies, and volunteers.
+Keywords: Decentralized training
+## [docquery](https://github.com/impira/docquery)
+DocQuery is a library and command-line tool that makes it easy to analyze semi-structured and unstructured documents (PDFs, scanned images, etc.) using large language models (LLMs). You simply point DocQuery at one or more documents and specify a question you want to ask. DocQuery is created by the team at Impira.
+Keywords: Semi-structured documents, Unstructured documents, LLM, Document Question Answering
+## [CodeGeeX](https://github.com/THUDM/CodeGeeX)
+[CodeGeeX](https://github.com/THUDM/CodeGeeX) is a large-scale multilingual code generation model with 13 billion parameters, pre-trained on a large code corpus of more than 20 programming languages. It has several unique features:
+- Multilingual code generation
+- Crosslingual code translation
+- Is a customizable programming assistant
+Keywords: Code Generation Model
+## [ktrain](https://github.com/amaiya/ktrain)
+[ktrain](https://github.com/amaiya/ktrain) is a lightweight wrapper for the deep learning library TensorFlow Keras (and other libraries) to help build, train, and deploy neural networks and other machine learning models. Inspired by ML framework extensions like fastai and ludwig, [ktrain](https://github.com/amaiya/ktrain) is designed to make deep learning and AI more accessible and easier to apply for both newcomers and experienced practitioners.
+Keywords: Keras wrapper, Model building, Training, Deployment
+## [FastDeploy](https://github.com/PaddlePaddle/FastDeploy)
+[FastDeploy](https://github.com/PaddlePaddle/FastDeploy) is an Easy-to-use and High Performance AI model deployment toolkit for Cloud, Mobile and Edge with packageout-of-the-box and unified experience, endend-to-end optimization for over fire160+ Text, Vision, Speech and Cross-modal AI models. Including image classification, object detection, OCR, face detection, matting, pp-tracking, NLP, stable diffusion, TTS and other tasks to meet developers' industrial deployment needs for multi-scenario, multi-hardware and multi-platform.
+Keywords: Model deployment, CLoud, Mobile, Edge
+## [underthesea](https://github.com/undertheseanlp/underthesea)
+[underthesea](https://github.com/undertheseanlp/underthesea) is a Vietnamese NLP toolkit. Underthesea is a suite of open source Python modules data sets and tutorials supporting research and development in Vietnamese Natural Language Processing. We provide extremely easy API to quickly apply pretrained NLP models to your Vietnamese text, such as word segmentation, part-of-speech tagging (PoS), named entity recognition (NER), text classification and dependency parsing.
+Keywords: Vietnamese, NLP
+## [hasktorch](https://github.com/hasktorch/hasktorch)
+Hasktorch is a library for tensors and neural networks in Haskell. It is an independent open source community project which leverages the core C++ libraries shared by PyTorch.
+Keywords: Haskell, Neural Networks
+## [donut](https://github.com/clovaai/donut)
+Donut, or Document understanding transformer, is a new method of document understanding that utilizes an OCR-free end-to-end Transformer model.
+Donut does not require off-the-shelf OCR engines/APIs, yet it shows state-of-the-art performances on various visual document understanding tasks, such as visual document classification or information extraction (a.k.a. document parsing).
+Keywords: Document Understanding
+## [transformers-interpret](https://github.com/cdpierse/transformers-interpret)
+Transformers Interpret is a model explainability tool designed to work exclusively with the transformers package.
+In line with the philosophy of the Transformers package Transformers Interpret allows any transformers model to be explained in just two lines. Explainers are available for both text and computer vision models. Visualizations are also available in notebooks and as savable png and html files
+Keywords: Model interpretation, Visualization
+## [mlrun](https://github.com/mlrun/mlrun)
+MLRun is an open MLOps platform for quickly building and managing continuous ML applications across their lifecycle. MLRun integrates into your development and CI/CD environment and automates the delivery of production data, ML pipelines, and online applications, significantly reducing engineering efforts, time to production, and computation resources. With MLRun, you can choose any IDE on your local machine or on the cloud. MLRun breaks the silos between data, ML, software, and DevOps/MLOps teams, enabling collaboration and fast continuous improvements.
+Keywords: MLOps
+## [FederatedScope](https://github.com/alibaba/FederatedScope)
+[FederatedScope](https://github.com/alibaba/FederatedScope) is a comprehensive federated learning platform that provides convenient usage and flexible customization for various federated learning tasks in both academia and industry. Based on an event-driven architecture, [FederatedScope](https://github.com/alibaba/FederatedScope) integrates rich collections of functionalities to satisfy the burgeoning demands from federated learning, and aims to build up an easy-to-use platform for promoting learning safely and effectively.
+Keywords: Federated learning, Event-driven
+## [pythainlp](https://github.com/PyThaiNLP/pythainlp)
+PyThaiNLP is a Python package for text processing and linguistic analysis, similar to NLTK with focus on Thai language.
+Keywords: Thai, NLP, NLTK
+## [FlagAI](https://github.com/FlagAI-Open/FlagAI)
+[FlagAI](https://github.com/FlagAI-Open/FlagAI) (Fast LArge-scale General AI models) is a fast, easy-to-use and extensible toolkit for large-scale model. Our goal is to support training, fine-tuning, and deployment of large-scale models on various downstream tasks with multi-modality.
+Keywords: Large models, Training, Fine-tuning, Deployment, Multi-modal
+## [pyserini](https://github.com/castorini/pyserini)
+[pyserini](https://github.com/castorini/pyserini) is a Python toolkit for reproducible information retrieval research with sparse and dense representations. Retrieval using sparse representations is provided via integration with the group's Anserini IR toolkit. Retrieval using dense representations is provided via integration with Facebook's Faiss library.
+Keywords: IR, Information Retrieval, Dense, Sparse
+## [baal](https://github.com/baal-org/baal)
+[baal](https://github.com/baal-org/baal) is an active learning library that supports both industrial applications and research usecases. [baal](https://github.com/baal-org/baal) currently supports Monte-Carlo Dropout, MCDropConnect, deep ensembles, and semi-supervised learning.
+Keywords: Active Learning, Research, Labeling
+## [cleanlab](https://github.com/cleanlab/cleanlab)
+[cleanlab](https://github.com/cleanlab/cleanlab) is the standard data-centric AI package for data quality and machine learning with messy, real-world data and labels. For text, image, tabular, audio (among others) datasets, you can use cleanlab to automatically: detect data issues (outliers, label errors, near duplicates, etc), train robust ML models, infer consensus + annotator-quality for multi-annotator data, suggest data to (re)label next (active learning).
+Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active Learning  
+## [BentoML](https://github.com/bentoml/BentoML)
+[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models.
+All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage.
+Keywords: BentoML, Framework, Deployment, AI Applications
+## [LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory)
+[LLaMA Factory](https://github.com/hiyouga/LLaMA-Factory) offers a user-friendly fine-tuning framework that incorporates PEFT. The repository includes training(fine-tuning) and inference examples for LLaMA-2, BLOOM, Falcon, Baichuan, Qwen, and other LLMs. A ChatGLM version is also available in [ChatGLM-Efficient-Tuning](https://github.com/hiyouga/ChatGLM-Efficient-Tuning).
+Keywords: PEFT, fine-tuning, LLaMA-2, ChatGLM, Qwen
--- a/benchmark/.gitignore
+++ b/benchmark/.gitignore
+benchmark_results/
\ No newline at end of file
--- a/benchmark/README.md
+++ b/benchmark/README.md
+# Benchmarks
+You might want to add new benchmarks.
+You will need to define a python function named `run_benchmark` in your python file and the file must be located in this `benchmark/` directory.
+The expected function signature is the following:
+```py
+def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
+```
+## Writing metrics to the database
+`MetricsRecorder` is thread-safe, in the sense of the python [`Thread`](https://docs.python.org/3/library/threading.html#threading.Thread). This means you can start a background thread to do the readings on the device measurements while not blocking the main thread to execute the model measurements.
+cf [`llama.py`](./llama.py) to see an example of this in practice.
+```py
+from benchmarks_entrypoint import MetricsRecorder
+import psycopg2
+def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str, num_tokens_to_generate=100):
+  metrics_recorder = MetricsRecorder(psycopg2.connect("dbname=metrics"), logger, branch, commit_id, commit_msg)
+  benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
+    # To collect device measurements
+    metrics_recorder.collect_device_measurements(
+        benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
+    )
+    # To collect your model measurements
+    metrics_recorder.collect_model_measurements(
+        benchmark_id,
+        {
+            "model_load_time": model_load_time,
+            "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
+            "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
+            "first_eager_generate_time_secs": first_eager_generate_time,
+            "second_eager_generate_time_secs": second_eager_generate_time,
+            "time_to_first_token_secs": time_to_first_token,
+            "time_to_second_token_secs": time_to_second_token,
+            "time_to_third_token_secs": time_to_third_token,
+            "time_to_next_token_mean_secs": mean_time_to_next_token,
+            "first_compile_generate_time_secs": first_compile_generate_time,
+            "second_compile_generate_time_secs": second_compile_generate_time,
+            "third_compile_generate_time_secs": third_compile_generate_time,
+            "fourth_compile_generate_time_secs": fourth_compile_generate_time,
+        },
+    )
+```
--- a/benchmark/__init__.py
+++ b/benchmark/__init__.py
--- a/benchmark/benches/llama.py
+++ b/benchmark/benches/llama.py
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from logging import Logger
+from threading import Event, Thread
+from time import perf_counter, sleep
+# Add the parent directory to Python path to import benchmarks_entrypoint
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import gpustat
+import psutil
+import psycopg2
+from benchmarks_entrypoint import MetricsRecorder
+# Optional heavy ML dependencies - only required when actually running the benchmark
+try:
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
+    TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    TRANSFORMERS_AVAILABLE = False
+    torch = None
+    AutoModelForCausalLM = None
+    AutoTokenizer = None
+    GenerationConfig = None
+    StaticCache = None
+os.environ["HF_XET_HIGH_PERFORMANCE"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "1"
+# Only set torch precision if torch is available
+if TRANSFORMERS_AVAILABLE:
+    torch.set_float32_matmul_precision("high")
+def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
+    p = psutil.Process(os.getpid())
+    while not continue_metric_collection.is_set():
+        with p.oneshot():
+            cpu_util = p.cpu_percent()
+            mem_megabytes = p.memory_info().rss / (1024 * 1024)
+        gpu_stats = gpustat.GPUStatCollection.new_query()
+        gpu_util = gpu_stats[0]["utilization.gpu"]
+        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
+        metrics_recorder.collect_device_measurements(
+            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
+        )
+        sleep(0.01)
+def run_benchmark(
+    logger: Logger,
+    repository: str,
+    branch: str,
+    commit_id: str,
+    commit_msg: str,
+    metrics_recorder=None,
+    num_tokens_to_generate=100,
+):
+    # Check if required ML dependencies are available
+    if not TRANSFORMERS_AVAILABLE:
+        logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
+        logger.error("pip install torch transformers")
+        logger.error("Skipping LLaMA benchmark due to missing dependencies.")
+        return
+    continue_metric_collection = Event()
+    metrics_thread = None
+    model_id = "meta-llama/Llama-2-7b-hf"
+    # If no metrics_recorder is provided, create one for backward compatibility
+    if metrics_recorder is None:
+        try:
+            metrics_recorder = MetricsRecorder(
+                psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
+            )
+            should_close_recorder = True
+        except Exception as e:
+            logger.error(f"Failed to create metrics recorder: {e}")
+            return
+    else:
+        should_close_recorder = False
+    try:
+        gpu_stats = gpustat.GPUStatCollection.new_query()
+        gpu_name = gpu_stats[0]["name"]
+        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
+        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
+        metrics_thread = Thread(
+            target=collect_metrics,
+            args=[benchmark_id, continue_metric_collection, metrics_recorder],
+        )
+        metrics_thread.start()
+        logger.info("started background thread to fetch device metrics")
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
+        device = "cuda"
+        logger.info("downloading weights")
+        # This is to avoid counting download in model load time measurement
+        model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
+        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
+        logger.info("loading model")
+        start = perf_counter()
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id, dtype=torch.float16, generation_config=gen_config
+        ).eval()
+        model.to(device)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        model_load_time = end - start
+        logger.info(f"loaded model in: {model_load_time}s")
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        prompt = "Why dogs are so cute?"
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)
+        # Specify the max length (including both the prompt and the response)
+        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
+        # with sequence length = `max_length`. The longer the more you will re-use it
+        seq_length = inputs["input_ids"].shape[1]
+        model.generation_config.max_length = seq_length + num_tokens_to_generate
+        batch_size = inputs["input_ids"].shape[0]
+        # Copied from the gpt-fast repo
+        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
+            q = torch.empty_like(probs_sort).exponential_(1)
+            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
+        def logits_to_probs(logits, temperature: float = 1.0, top_k: int | None = None):
+            logits = logits / max(temperature, 1e-5)
+            if top_k is not None:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                pivot = v.select(-1, -1).unsqueeze(-1)
+                logits = torch.where(logits < pivot, -float("Inf"), logits)
+            probs = torch.nn.functional.softmax(logits, dim=-1)
+            return probs
+        def sample(logits, temperature: float = 1.0, top_k: int | None = None):
+            probs = logits_to_probs(logits[0, -1], temperature, top_k)
+            idx_next = multinomial_sample_one_no_sync(probs)
+            return idx_next, probs
+        # First eager forward pass
+        logger.info("running first eager forward pass")
+        start = perf_counter()
+        _ = model(**inputs)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        first_eager_fwd_pass_time = end - start
+        logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
+        # Second eager forward pass (should be faster)
+        logger.info("running second eager forward pass")
+        start = perf_counter()
+        _ = model(**inputs)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        second_eager_fwd_pass_time = end - start
+        logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
+        # First eager generation
+        logger.info("running first eager generation")
+        start = perf_counter()
+        output = model.generate(**inputs)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        first_eager_generate_time = end - start
+        logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        # Second eager generation (should be faster)
+        logger.info("running second eager generation")
+        start = perf_counter()
+        output = model.generate(**inputs)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        second_eager_generate_time = end - start
+        logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        logger.info("running generation timing loop")
+        input_pos = torch.arange(0, seq_length, device=device)
+        inputs = inputs["input_ids"]
+        start = perf_counter()
+        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
+            logits = model(inputs, position_ids=input_pos).logits
+        next_token, probs = sample(logits, temperature=0.6, top_k=5)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        time_to_first_token = end - start
+        input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
+        next_token = next_token.clone()
+        start = perf_counter()
+        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
+            logits = model(next_token, position_ids=input_pos).logits
+        next_token, probs = sample(logits, temperature=0.6, top_k=5)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        time_to_second_token = end - start
+        input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
+        next_token = next_token.clone()
+        start = perf_counter()
+        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
+            logits = model(next_token, position_ids=input_pos).logits
+        next_token, probs = sample(logits, temperature=0.6, top_k=5)
+        torch.cuda.synchronize()
+        end = perf_counter()
+        time_to_third_token = end - start
+        logger.info("running longer generation timing loop")
+        total_time = 0
+        for i in range(20):
+            input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
+            next_token = next_token.clone()
+            start = perf_counter()
+            with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
+                logits = model(next_token, position_ids=input_pos).logits
+            next_token, probs = sample(logits, temperature=0.6, top_k=5)
+            torch.cuda.synchronize()
+            end = perf_counter()
+            total_time += end - start
+        mean_time_to_next_token = total_time / 20
+        logger.info("running compilation benchmarks")
+        # Now compile the model
+        model = torch.compile(model, mode="max-autotune", fullgraph=True)
+        # StaticCache for generation
+        with torch.device(device):
+            model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
+        input_pos = torch.arange(0, seq_length, device=device)
+        inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
+        logger.info("compiling model")
+        model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config)
+        model.to(device)
+        model = torch.compile(model, mode="max-autotune", fullgraph=True)
+        past_key_values = StaticCache(
+            model.config,
+            max_batch_size=batch_size,
+            device=device,
+            dtype=torch.float16,
+            max_cache_len=seq_length + 128,
+        )
+        # 1st call
+        start = perf_counter()
+        output = model.generate(**inputs, past_key_values=past_key_values)
+        end = perf_counter()
+        first_compile_generate_time = end - start
+        logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        past_key_values = StaticCache(
+            model.config,
+            max_batch_size=batch_size,
+            device=device,
+            dtype=torch.float16,
+            max_cache_len=seq_length + 128,
+        )
+        # 2nd call
+        start = perf_counter()
+        output = model.generate(**inputs, past_key_values=past_key_values)
+        end = perf_counter()
+        second_compile_generate_time = end - start
+        logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        past_key_values = StaticCache(
+            model.config,
+            max_batch_size=batch_size,
+            device=device,
+            dtype=torch.float16,
+            max_cache_len=seq_length + 128,
+        )
+        # 3rd call
+        start = perf_counter()
+        output = model.generate(**inputs, past_key_values=past_key_values)
+        end = perf_counter()
+        third_compile_generate_time = end - start
+        logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        past_key_values = StaticCache(
+            model.config,
+            max_batch_size=batch_size,
+            device=device,
+            dtype=torch.float16,
+            max_cache_len=seq_length + 128,
+        )
+        # 4th call
+        start = perf_counter()
+        output = model.generate(**inputs, past_key_values=past_key_values)
+        end = perf_counter()
+        fourth_compile_generate_time = end - start
+        logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
+        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
+        metrics_recorder.collect_model_measurements(
+            benchmark_id,
+            {
+                "model_load_time": model_load_time,
+                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
+                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
+                "first_eager_generate_time_secs": first_eager_generate_time,
+                "second_eager_generate_time_secs": second_eager_generate_time,
+                "time_to_first_token_secs": time_to_first_token,
+                "time_to_second_token_secs": time_to_second_token,
+                "time_to_third_token_secs": time_to_third_token,
+                "time_to_next_token_mean_secs": mean_time_to_next_token,
+                "first_compile_generate_time_secs": first_compile_generate_time,
+                "second_compile_generate_time_secs": second_compile_generate_time,
+                "third_compile_generate_time_secs": third_compile_generate_time,
+                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
+            },
+        )
+    except Exception as e:
+        logger.error(f"Caught exception: {e}")
+    continue_metric_collection.set()
+    if metrics_thread is not None:
+        metrics_thread.join()
+    # Only close the recorder if we created it locally
+    if should_close_recorder:
+        metrics_recorder.close()
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Run benchmark using the `optimum-benchmark` library with some customization in `transformers`.
+Assume we are under `transformers` root directory: (make sure the commits are valid commits)
+```bash
+python benchmark/benchmark.py --config-dir benchmark/config --config-name generation --commit=9b9c7f03da625b13643e99205c691fe046461724 --metrics=decode.latency.mean,per_token.latency.mean,per_token.throughput.value backend.model=google/gemma-2b benchmark.input_shapes.sequence_length=5,7 benchmark.input_shapes.batch_size=1,2 --multirun
+```
+"""
+import argparse
+import glob
+import json
+import os.path
+import re
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from git import Repo
+from huggingface_hub import HfApi
+from optimum_benchmark import Benchmark
+from optimum_benchmark_wrapper import main
+PATH_TO_REPO = Path(__file__).parent.parent.resolve()
+@contextmanager
+def checkout_commit(repo: Repo, commit_id: str):
+    """
+    Context manager that checks out a given commit when entered, but gets back to the reference it was at on exit.
+    Args:
+        repo (`git.Repo`): A git repository (for instance the Transformers repo).
+        commit_id (`str`): The commit reference to checkout inside the context manager.
+    """
+    current_head = repo.head.commit if repo.head.is_detached else repo.head.ref
+    try:
+        repo.git.checkout(commit_id)
+        yield
+    finally:
+        repo.git.checkout(current_head)
+def summarize(run_dir, metrics, expand_metrics=False):
+    """Produce a summary for each optimum-benchmark launched job's output directory found in `run_dir`.
+    Each summary's format is as follows (for `expand_metrics=False`):
+    ```
+    {
+        "model": "google/gemma-2b",
+        "commit": "3cd6ed22e4d49219f300f5055e71e3929aba20d7",
+        "config": "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5",
+        "metrics": {
+            "decode.latency.mean": 1.624666809082031,
+            "per_token.latency.mean": 0.012843788806628804,
+            "per_token.throughput.value": 77.85864553330948
+        }
+    }
+    ```
+    """
+    reports = glob.glob(os.path.join(run_dir, "**/benchmark_report.json"), recursive=True)
+    report_dirs = [str(Path(report).parent) for report in reports]
+    summaries = []
+    for report_dir in report_dirs:
+        commit = re.search(r"/commit=([^/]+)", report_dir).groups()[0]
+        if not os.path.isfile(os.path.join(report_dir, "benchmark.json")):
+            continue
+        benchmark = Benchmark.from_json(os.path.join(report_dir, "benchmark.json"))
+        report = benchmark.report
+        model = benchmark.config.backend["model"]
+        # This looks like `benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5`.
+        # (we rely on the usage of hydra's `${hydra.job.override_dirname}`.)
+        benchmark_name = re.sub(f"backend.model={model},*", "", report_dir)
+        benchmark_name = str(Path(benchmark_name).parts[-1])
+        if benchmark_name.startswith("commit="):
+            benchmark_name = benchmark.config.name
+        metrics_values = {}
+        # post-processing of report: show a few selected/important metric
+        for metric in metrics:
+            keys = metric.split(".")
+            value = report.to_dict()
+            current = metrics_values
+            for key in keys:
+                # Avoid KeyError when a user's specified metric has typo.
+                # TODO: Give warnings.
+                if key not in value:
+                    continue
+                value = value[key]
+                if expand_metrics:
+                    if isinstance(value, dict):
+                        if key not in current:
+                            current[key] = {}
+                            current = current[key]
+                    else:
+                        current[key] = value
+            if not expand_metrics:
+                metrics_values[metric] = value
+        # show some config information
+        print(f"model: {model}")
+        print(f"commit: {commit}")
+        print(f"config: {benchmark_name}")
+        if len(metrics_values) > 0:
+            print("metrics:")
+            if expand_metrics:
+                print(metrics_values)
+            else:
+                for metric, value in metrics_values.items():
+                    print(f"  - {metric}: {value}")
+        print("-" * 80)
+        summary = {
+            "model": model,
+            "commit": commit,
+            "config": benchmark_name,
+            "metrics": metrics_values,
+        }
+        summaries.append(summary)
+        with open(os.path.join(report_dir, "summary.json"), "w") as fp:
+            json.dump(summary, fp, indent=4)
+    return summaries
+def combine_summaries(summaries):
+    """Combine a list of summary obtained from the function `summarize`.
+    The combined summary's format is as follows:
+    ```
+    "google/gemma-2b": {
+        "benchmark.input_shapes.batch_size=1,benchmark.input_shapes.sequence_length=5": {
+            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
+                "metrics": {"decode.latency.mean": 1.624666809082031}
+            },
+            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
+                "metrics": {"decode.latency.mean": 1.6278163452148438}
+            }
+        },
+        "benchmark.input_shapes.batch_size=2,benchmark.input_shapes.sequence_length=5": {
+            "3cd6ed22e4d49219f300f5055e71e3929aba20d7": {
+                "metrics": {"decode.latency.mean": 1.6947791748046876}
+            },
+            "c97ee28b117c0abe8e08891f402065e4df6d72aa": {
+                "metrics": {
+                    "decode.latency.mean": 1.6980519409179688}
+            }
+        }
+    }
+    ```
+    """
+    combined = {}
+    for summary in summaries:
+        model = summary["model"]
+        config = summary["config"]
+        commit = summary["commit"]
+        if model not in combined:
+            combined[model] = {}
+        if config not in combined[model]:
+            combined[model][config] = {}
+        if commit not in combined[model][config]:
+            combined[model][config][commit] = {"metrics": summary["metrics"]}
+    with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp:
+        json.dump(combined, fp, indent=4)
+    print(json.dumps(combined, indent=4))
+    return combined
+if __name__ == "__main__":
+    def list_str(values):
+        return values.split(",")
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config-dir", type=str, required=True, help="The path to the config directory.")
+    parser.add_argument("--config-name", type=str, required=True, help="The config name.")
+    # arguments specific to this wrapper for our own customization
+    parser.add_argument("--ensure_empty", type=bool, default=True, help="If to create a temporary directory.")
+    parser.add_argument(
+        "--commit",
+        type=list_str,
+        default="",
+        help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.",
+    )
+    parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.")
+    parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.")
+    parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.")
+    parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).")
+    args, optimum_benchmark_args = parser.parse_known_args()
+    repo = Repo(PATH_TO_REPO)
+    metrics = [
+        "prefill.latency.mean",
+        "prefill.throughput.value",
+        "decode.latency.mean",
+        "decode.throughput.value",
+        "per_token.latency.mean",
+        "per_token.throughput.value",
+    ]
+    if args.metrics is not None:
+        metrics = args.metrics.split(",")
+    # Get `backend.model` in a hacky way: We want to control the experiment flow manually.
+    models = [""]
+    for idx, arg in enumerate(optimum_benchmark_args):
+        if arg.startswith("backend.model="):
+            models = arg[len("backend.model=") :]
+            models = models.split(",")
+            break
+    optimum_benchmark_args = [arg for arg in optimum_benchmark_args if not arg.startswith("backend.model=")]
+    # Get the commit(s)
+    current_head = str(repo.head.commit) if repo.head.is_detached else str(repo.head.ref)
+    commits = [x for x in args.commit if x != ""]
+    if len(commits) == 0:
+        commits = [current_head]
+    elif len(commits) == 1 and commits[0] == "diff":
+        # compare to `main`
+        commits = ["main", current_head]
+    # Get the specified run directory
+    run_dir_arg_idx, run_dir = -1, None
+    sweep_dir_arg_idx, sweep_dir = -1, None
+    for idx, arg in enumerate(optimum_benchmark_args):
+        if arg.startswith("hydra.run.dir="):
+            run_dir = arg[len("hydra.run.dir=") :]
+            run_dir_arg_idx = idx
+        elif arg.startswith("hydra.sweep.dir="):
+            sweep_dir = arg[len("hydra.sweep.dir=") :]
+            sweep_dir_arg_idx = idx
+    exp_run_dir, arg_dix, arg_name = (
+        (sweep_dir, sweep_dir_arg_idx, "hydra.sweep.dir")
+        if "--multirun" in optimum_benchmark_args
+        else (run_dir, run_dir_arg_idx, "hydra.run.dir")
+    )
+    # TODO: not hardcoded
+    if exp_run_dir is None and args.ensure_empty:
+        exp_run_dir = "_benchmark"
+    if args.ensure_empty:
+        os.makedirs(exp_run_dir, exist_ok=True)
+        exp_run_dir = tempfile.mkdtemp(dir=exp_run_dir)
+    run_summaries = []
+    for commit in commits:
+        with checkout_commit(repo, commit):
+            commit = str(repo.head.commit)
+            commit_run_dir = exp_run_dir
+            if exp_run_dir is not None:
+                commit_run_dir = os.path.join(exp_run_dir, rf"commit\={commit}")
+            print(f"Run benchmark on commit: {commit}")
+            for model in models:
+                model_arg = [f"backend.model={model}"] if model != "" else []
+                dir_args = []
+                if commit_run_dir is not None:
+                    if arg_dix > -1:
+                        optimum_benchmark_args[arg_dix] = f"{arg_name}={commit_run_dir}"
+                    else:
+                        dir_args = [
+                            f"hydra.sweep.dir={commit_run_dir}",
+                            f"hydra.run.dir={commit_run_dir}/" + "${hydra.job.override_dirname}",
+                        ]
+                main(args.config_dir, args.config_name, model_arg + dir_args + optimum_benchmark_args)
+            if commit_run_dir is not None:
+                # Need to remove the `\` character
+                summaries = summarize(commit_run_dir.replace("\\", ""), metrics)
+                run_summaries.extend(summaries)
+    # aggregate the information across the commits
+    if exp_run_dir is not None:
+        with open(os.path.join(exp_run_dir, "summaries.json"), "w") as fp:
+            json.dump(run_summaries, fp, indent=4)
+        combined_summary = combine_summaries(run_summaries)
+        if args.repo_id is not None and args.path_in_repo is not None:
+            # Upload to Hub
+            api = HfApi()
+            api.upload_folder(
+                folder_path=exp_run_dir,
+                path_in_repo=args.path_in_repo,
+                repo_id=args.repo_id,
+                repo_type="dataset",
+                token=args.token,
+            )
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import importlib.util
+import json
+import logging
+import os
+import sys
+import uuid
+from datetime import datetime
+import pandas as pd
+try:
+    from psycopg2.extensions import register_adapter
+    from psycopg2.extras import Json
+    register_adapter(dict, Json)
+    PSYCOPG2_AVAILABLE = True
+except ImportError:
+    PSYCOPG2_AVAILABLE = False
+class ImportModuleException(Exception):
+    pass
+class MetricsRecorder:
+    def __init__(
+        self,
+        connection,
+        logger: logging.Logger,
+        repository: str,
+        branch: str,
+        commit_id: str,
+        commit_msg: str,
+        collect_csv_data: bool = True,
+    ):
+        self.conn = connection
+        self.use_database = connection is not None
+        if self.use_database:
+            self.conn.autocommit = True
+        self.logger = logger
+        self.repository = repository
+        self.branch = branch
+        self.commit_id = commit_id
+        self.commit_msg = commit_msg
+        self.collect_csv_data = collect_csv_data
+        # For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
+        if self.collect_csv_data:
+            # Initialize empty DataFrames with proper schemas
+            self.benchmarks_df = pd.DataFrame(
+                columns=[
+                    "benchmark_id",
+                    "repository",
+                    "branch",
+                    "commit_id",
+                    "commit_message",
+                    "metadata",
+                    "created_at",
+                ]
+            )
+            self.device_measurements_df = pd.DataFrame(
+                columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"]
+            )
+            self.model_measurements_df = pd.DataFrame(
+                columns=[
+                    "benchmark_id",
+                    "time",
+                    "model_load_time",
+                    "first_eager_forward_pass_time_secs",
+                    "second_eager_forward_pass_time_secs",
+                    "first_eager_generate_time_secs",
+                    "second_eager_generate_time_secs",
+                    "time_to_first_token_secs",
+                    "time_to_second_token_secs",
+                    "time_to_third_token_secs",
+                    "time_to_next_token_mean_secs",
+                    "first_compile_generate_time_secs",
+                    "second_compile_generate_time_secs",
+                    "third_compile_generate_time_secs",
+                    "fourth_compile_generate_time_secs",
+                ]
+            )
+        else:
+            self.benchmarks_df = None
+            self.device_measurements_df = None
+            self.model_measurements_df = None
+    def initialise_benchmark(self, metadata: dict[str, str]) -> str:
+        """
+        Creates a new benchmark, returns the benchmark id (UUID)
+        """
+        # Generate a unique UUID for this benchmark
+        benchmark_id = str(uuid.uuid4())
+        if self.use_database:
+            with self.conn.cursor() as cur:
+                cur.execute(
+                    "INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
+                    (benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
+                )
+                self.logger.debug(f"initialised benchmark #{benchmark_id}")
+        # Store benchmark data for CSV export (if enabled)
+        if self.collect_csv_data:
+            # Add row to pandas DataFrame
+            new_row = pd.DataFrame(
+                [
+                    {
+                        "benchmark_id": benchmark_id,
+                        "repository": self.repository,
+                        "branch": self.branch,
+                        "commit_id": self.commit_id,
+                        "commit_message": self.commit_msg,
+                        "metadata": json.dumps(metadata),
+                        "created_at": datetime.utcnow().isoformat(),
+                    }
+                ]
+            )
+            self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
+        mode_info = []
+        if self.use_database:
+            mode_info.append("database")
+        if self.collect_csv_data:
+            mode_info.append("CSV")
+        mode_str = " + ".join(mode_info) if mode_info else "no storage"
+        self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
+        return benchmark_id
+    def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
+        """
+        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
+        """
+        # Store device measurements for CSV export (if enabled)
+        if self.collect_csv_data:
+            # Add row to pandas DataFrame
+            new_row = pd.DataFrame(
+                [
+                    {
+                        "benchmark_id": benchmark_id,
+                        "cpu_util": cpu_util,
+                        "mem_megabytes": mem_megabytes,
+                        "gpu_util": gpu_util,
+                        "gpu_mem_megabytes": gpu_mem_megabytes,
+                        "time": datetime.utcnow().isoformat(),
+                    }
+                ]
+            )
+            self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
+        # Store in database if available
+        if self.use_database:
+            with self.conn.cursor() as cur:
+                cur.execute(
+                    "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
+                    (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
+                )
+        self.logger.debug(
+            f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
+        )
+    def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
+        # Store model measurements for CSV export (if enabled)
+        if self.collect_csv_data:
+            # Add row to pandas DataFrame with flattened measurements
+            row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()}
+            # Flatten the measurements dict into the row
+            row_data.update(measurements)
+            new_row = pd.DataFrame([row_data])
+            self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
+        # Store in database if available
+        if self.use_database:
+            with self.conn.cursor() as cur:
+                cur.execute(
+                    """
+                    INSERT INTO model_measurements (
+                        benchmark_id,
+                        measurements
+                    ) VALUES (%s, %s)
+                    """,
+                    (
+                        benchmark_id,
+                        measurements,
+                    ),
+                )
+        self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
+    def export_to_csv(self, output_dir: str = "benchmark_results"):
+        """
+        Export all collected data to CSV files using pandas DataFrames
+        """
+        if not self.collect_csv_data:
+            self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
+            return
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+            self.logger.info(f"Created output directory: {output_dir}")
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        files_created = []
+        # Export using pandas DataFrames
+        self._export_pandas_data(output_dir, timestamp, files_created)
+        self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
+    def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
+        """
+        Export CSV files using pandas DataFrames
+        """
+        # Export benchmarks
+        benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
+        self.benchmarks_df.to_csv(benchmarks_file, index=False)
+        files_created.append(benchmarks_file)
+        self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
+        # Export device measurements
+        device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
+        self.device_measurements_df.to_csv(device_file, index=False)
+        files_created.append(device_file)
+        self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
+        # Export model measurements (already flattened)
+        model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
+        self.model_measurements_df.to_csv(model_file, index=False)
+        files_created.append(model_file)
+        self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
+        # Create comprehensive summary using pandas operations
+        summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
+        self._create_summary(summary_file)
+        files_created.append(summary_file)
+    def _create_summary(self, summary_file: str):
+        """
+        Create a comprehensive summary CSV using pandas operations
+        """
+        if len(self.benchmarks_df) == 0:
+            # Create empty summary file
+            summary_df = pd.DataFrame()
+            summary_df.to_csv(summary_file, index=False)
+            self.logger.info(f"Created empty benchmark summary at {summary_file}")
+            return
+        # Start with benchmarks as the base
+        summary_df = self.benchmarks_df.copy()
+        # Add model measurements (join on benchmark_id)
+        if len(self.model_measurements_df) > 0:
+            # Drop 'time' column from model measurements to avoid conflicts
+            model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore")
+            summary_df = summary_df.merge(model_df, on="benchmark_id", how="left")
+        # Calculate device measurement aggregates using pandas groupby
+        if len(self.device_measurements_df) > 0:
+            device_agg = (
+                self.device_measurements_df.groupby("benchmark_id")
+                .agg(
+                    {
+                        "cpu_util": ["mean", "max", "std", "count"],
+                        "mem_megabytes": ["mean", "max", "std"],
+                        "gpu_util": ["mean", "max", "std"],
+                        "gpu_mem_megabytes": ["mean", "max", "std"],
+                    }
+                )
+                .round(3)
+            )
+            # Flatten column names
+            device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
+            device_agg = device_agg.reset_index()
+            # Rename count column to be more descriptive
+            if "cpu_util_count" in device_agg.columns:
+                device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"})
+            # Merge with summary
+            summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left")
+        # Export the comprehensive summary
+        summary_df.to_csv(summary_file, index=False)
+        self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
+    def close(self):
+        if self.use_database and self.conn:
+            self.conn.close()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler(sys.stdout)
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter("[%(levelname)s - %(asctime)s] %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+def parse_arguments() -> tuple[str, str, str, str, bool, str]:
+    """
+    Parse command line arguments for the benchmarking CLI.
+    """
+    parser = argparse.ArgumentParser(description="CLI for benchmarking the huggingface/transformers.")
+    parser.add_argument(
+        "repository",
+        type=str,
+        help="The repository name on which the benchmarking is performed.",
+    )
+    parser.add_argument(
+        "branch",
+        type=str,
+        help="The branch name on which the benchmarking is performed.",
+    )
+    parser.add_argument(
+        "commit_id",
+        type=str,
+        help="The commit hash on which the benchmarking is performed.",
+    )
+    parser.add_argument(
+        "commit_msg",
+        type=str,
+        help="The commit message associated with the commit, truncated to 70 characters.",
+    )
+    parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.")
+    parser.add_argument(
+        "--csv-output-dir",
+        type=str,
+        default="benchmark_results",
+        help="Directory for CSV output files (default: benchmark_results).",
+    )
+    args = parser.parse_args()
+    # CSV is disabled by default, only enabled when --csv is used
+    generate_csv = args.csv
+    return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
+def import_from_path(module_name, file_path):
+    try:
+        spec = importlib.util.spec_from_file_location(module_name, file_path)
+        module = importlib.util.module_from_spec(spec)
+        sys.modules[module_name] = module
+        spec.loader.exec_module(module)
+        return module
+    except Exception as e:
+        raise ImportModuleException(f"failed to load python module: {e}")
+def create_database_connection():
+    """
+    Try to create a database connection. Returns None if connection fails.
+    """
+    if not PSYCOPG2_AVAILABLE:
+        logger.warning("psycopg2 not available - running in CSV-only mode")
+        return None
+    try:
+        import psycopg2
+        conn = psycopg2.connect("dbname=metrics")
+        logger.info("Successfully connected to database")
+        return conn
+    except Exception as e:
+        logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
+        return None
+def create_global_metrics_recorder(
+    repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False
+) -> MetricsRecorder:
+    """
+    Create a global metrics recorder that will be used across all benchmarks.
+    """
+    connection = create_database_connection()
+    recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
+    # Log the storage mode
+    storage_modes = []
+    if connection is not None:
+        storage_modes.append("database")
+    if generate_csv:
+        storage_modes.append("CSV")
+    if not storage_modes:
+        logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
+        logger.warning("Use --csv flag to enable CSV output when database is unavailable")
+    else:
+        logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
+    return recorder
+if __name__ == "__main__":
+    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
+    benches_folder_path = os.path.join(benchmarks_folder_path, "benches")
+    repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
+    # Create a global metrics recorder
+    global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
+    successful_benchmarks = 0
+    failed_benchmarks = 0
+    # Automatically discover all benchmark modules in benches/ folder
+    benchmark_modules = []
+    if os.path.exists(benches_folder_path):
+        logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
+        for entry in os.scandir(benches_folder_path):
+            if not entry.name.endswith(".py"):
+                continue
+            if entry.name.startswith("__"):  # Skip __init__.py, __pycache__, etc.
+                continue
+            # Check if the file has a run_benchmark function
+            try:
+                logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
+                module = import_from_path(entry.name.split(".")[0], entry.path)
+                if hasattr(module, "run_benchmark"):
+                    benchmark_modules.append(entry.name)
+                    logger.debug(f"discovered benchmark: {entry.name}")
+                else:
+                    logger.debug(f"skipping {entry.name} - no run_benchmark function found")
+            except Exception as e:
+                logger.debug(f"failed to check benches/{entry.name}: {e}")
+    else:
+        logger.warning(f"Benches directory not found: {benches_folder_path}")
+    if benchmark_modules:
+        logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
+    else:
+        logger.warning("No benchmark modules found in benches/ directory")
+    for module_name in benchmark_modules:
+        module_path = os.path.join(benches_folder_path, module_name)
+        try:
+            logger.debug(f"loading: {module_name}")
+            module = import_from_path(module_name.split(".")[0], module_path)
+            logger.info(f"running benchmarks in: {module_name}")
+            # Check if the module has an updated run_benchmark function that accepts metrics_recorder
+            try:
+                # Try the new signature first
+                module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
+            except TypeError:
+                # Fall back to the old signature for backward compatibility
+                logger.warning(
+                    f"Module {module_name} using old run_benchmark signature - database connection will be created per module"
+                )
+                module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
+            successful_benchmarks += 1
+        except ImportModuleException as e:
+            logger.error(e)
+            failed_benchmarks += 1
+        except Exception as e:
+            logger.error(f"error running benchmarks for {module_name}: {e}")
+            failed_benchmarks += 1
+    # Export CSV results at the end (if enabled)
+    try:
+        if generate_csv:
+            global_metrics_recorder.export_to_csv(csv_output_dir)
+            logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
+        else:
+            logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
+        logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
+    except Exception as e:
+        logger.error(f"Failed to export CSV results: {e}")
+    finally:
+        global_metrics_recorder.close()
--- a/benchmark/config/generation.yaml
+++ b/benchmark/config/generation.yaml
+defaults:
+  - benchmark # inheriting benchmark schema
+  - scenario: inference
+  - launcher: process
+  - backend: pytorch
+  - _self_ # for hydra 1.1 compatibility
+name: pytorch_generate
+launcher:
+  start_method: spawn
+  device_isolation: true
+  device_isolation_action: warn
+backend:
+  device: cuda
+  device_ids: 0
+  no_weights: true
+  model: meta-llama/Llama-2-7b-hf
+  cache_implementation: static
+  torch_compile: true
+  dtype: float16
+  torch_compile_config:
+    backend: inductor
+    mode: reduce-overhead
+    fullgraph: true
+scenario:
+  input_shapes:
+    batch_size: 1
+    sequence_length: 7
+  generate_kwargs:
+    max_new_tokens: 128
+    min_new_tokens: 128
+    do_sample: false
+  memory: true
+  latency: true
+  iterations: 2
+  duration: 0
+# hydra/cli specific settings
+hydra:
+  run:
+    # where to store run results
+    dir: runs/${name}
+  job:
+    # change working directory to the run directory
+    chdir: true
+    env_set:
+      # set environment variable OVERRIDE_BENCHMARKS to 1
+      # to not skip benchmarks that have been run before
+      OVERRIDE_BENCHMARKS: 1
+      LOG_LEVEL: WARN
+  sweep:
+    dir: multirun
+    subdir: ${hydra.job.override_dirname}
\ No newline at end of file
--- a/benchmark/default.yml
+++ b/benchmark/default.yml
+apiVersion: 1
+providers:
+  - name: 'Transformers Benchmarks'
+    orgId: 1
+    type: file
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/dashboards
--- a/benchmark/grafana_dashboard.json
+++ b/benchmark/grafana_dashboard.json
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 1,
+  "links": [
+    {
+      "asDropdown": false,
+      "icon": "external link",
+      "includeVars": false,
+      "keepTime": false,
+      "tags": [],
+      "targetBlank": false,
+      "title": "Go to data",
+      "tooltip": "Go to data",
+      "type": "link",
+      "url": "http://transformers-benchmarks.hf.co/d/fdz33iyzln9c0a/transformers-benchmarks?orgId=1&from=${StartTime}&to=${EndTime}"
+    }
+  ],
+  "liveNow": true,
+  "panels": [
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "left",
+            "cellOptions": {
+              "type": "auto"
+            },
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "gpu_name"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 202
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "left"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 407
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "commit_message"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 524
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "commit_id"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 353
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "model_id"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 216
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 5,
+      "options": {
+        "cellHeight": "sm",
+        "footer": {
+          "countRows": false,
+          "fields": "",
+          "reducer": [
+            "sum"
+          ],
+          "show": false
+        },
+        "showHeader": true,
+        "sortBy": []
+      },
+      "pluginVersion": "11.2.2",
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT commit_id, commit_message, metadata->>'gpu_name' as gpu_name, metadata->>'model_id' as model_id, created_at AS date FROM benchmarks WHERE branch = '${branch}' AND metadata->>'gpu_name' = '${gpu_name}' ORDER BY benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [
+                  {
+                    "name": "commit_id",
+                    "type": "functionParameter"
+                  }
+                ],
+                "type": "function"
+              },
+              {
+                "parameters": [
+                  {
+                    "name": "gpu_name",
+                    "type": "functionParameter"
+                  }
+                ],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50,
+            "whereJsonTree": {
+              "children1": [
+                {
+                  "id": "baaa8aaa-89ab-4cde-b012-31922f96de3f",
+                  "properties": {
+                    "field": "commit_id",
+                    "fieldSrc": "field",
+                    "operator": "equal",
+                    "value": [
+                      "${commit}"
+                    ],
+                    "valueError": [
+                      null
+                    ],
+                    "valueSrc": [
+                      "value"
+                    ],
+                    "valueType": [
+                      "text"
+                    ]
+                  },
+                  "type": "rule"
+                }
+              ],
+              "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+              "type": "group"
+            },
+            "whereString": "commit_id = '${commit}'"
+          },
+          "table": "benchmarks"
+        }
+      ],
+      "transparent": true,
+      "type": "table"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 6
+      },
+      "id": 13,
+      "panels": [],
+      "title": "Eager Forward Pass",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 0,
+        "y": 7
+      },
+      "id": 7,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "pluginVersion": "11.2.2",
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'first_eager_forward_pass_time_secs' AS double precision) AS first_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "First eager forward pass",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 12,
+        "y": 7
+      },
+      "id": 9,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'second_eager_forward_pass_time_secs' AS double precision) AS second_eager_forward_pass_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Second eager forward pass",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 18
+      },
+      "id": 16,
+      "panels": [],
+      "title": "Time to next token",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 0,
+        "y": 19
+      },
+      "id": 17,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'time_to_first_token_secs' AS double precision) AS time_to_first_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Time to first token",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 12,
+        "y": 19
+      },
+      "id": 18,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'time_to_second_token_secs' AS double precision) AS time_to_second_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Time to second token",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 0,
+        "y": 30
+      },
+      "id": 19,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'time_to_third_token_secs' AS double precision) AS time_to_third_token_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Time to third token",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 12,
+        "y": 30
+      },
+      "id": 20,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'time_to_next_token_mean_secs' AS double precision) AS time_to_next_token_mean_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Time to subsequent next tokens mean",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 41
+      },
+      "id": 14,
+      "panels": [],
+      "title": "Compiled Generate",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 0,
+        "y": 42
+      },
+      "id": 8,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'first_compile_generate_time_secs' AS double precision) AS first_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "First compile generate",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 12,
+        "y": 42
+      },
+      "id": 10,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'second_compile_generate_time_secs' AS double precision) AS second_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Second compile generate",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "scheme",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 0,
+        "y": 53
+      },
+      "id": 11,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'third_compile_generate_time_secs' AS double precision) AS third_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Third compile generate",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "default": true,
+        "type": "grafana-postgresql-datasource",
+        "uid": "be28nkzirtb0gd"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "continuous-YlBl"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 0,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 11,
+        "w": 12,
+        "x": 12,
+        "y": 53
+      },
+      "id": 12,
+      "options": {
+        "barRadius": 0.05,
+        "barWidth": 0.8,
+        "fullHighlight": false,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": false
+        },
+        "orientation": "auto",
+        "showValue": "auto",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 0
+      },
+      "targets": [
+        {
+          "datasource": {
+            "default": true,
+            "type": "grafana-postgresql-datasource",
+            "uid": "be28nkzirtb0gd"
+          },
+          "editorMode": "code",
+          "format": "table",
+          "rawQuery": true,
+          "rawSql": "SELECT CAST(m.measurements->'fourth_compile_generate_time_secs' AS double precision) AS fourth_compile_generate_time_secs, left(b.commit_id, 7), m.time FROM benchmarks as b JOIN model_measurements AS m ON b.benchmark_id = m.benchmark_id WHERE b.branch = '${branch}' AND b.metadata->>'gpu_name' = '${gpu_name}' ORDER BY b.benchmark_id DESC LIMIT ${last_n_commits};",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          }
+        }
+      ],
+      "title": "Fourth compile generate",
+      "transformations": [
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "time"
+              }
+            ]
+          }
+        }
+      ],
+      "transparent": true,
+      "type": "barchart"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 64
+      },
+      "id": 15,
+      "panels": [
+        {
+          "datasource": {},
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": 60000,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "percent"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 65
+          },
+          "id": 1,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "default": true,
+                "type": "grafana-postgresql-datasource",
+                "uid": "be28nkzirtb0gd"
+              },
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT\n  d.cpu_util,\n  d.time\nFROM\n  benchmarks AS b\n  JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n  branch = '${branch}';",
+              "refId": "A",
+              "sql": {
+                "columns": [
+                  {
+                    "parameters": [
+                      {
+                        "name": "cpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "\"time\"",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  }
+                ],
+                "groupBy": [
+                  {
+                    "property": {
+                      "type": "string"
+                    },
+                    "type": "groupBy"
+                  }
+                ],
+                "limit": 50,
+                "whereJsonTree": {
+                  "children1": [
+                    {
+                      "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+                      "properties": {
+                        "field": "commit_id",
+                        "fieldSrc": "field",
+                        "operator": "equal",
+                        "value": [
+                          "${commit}"
+                        ],
+                        "valueError": [
+                          null
+                        ],
+                        "valueSrc": [
+                          "value"
+                        ],
+                        "valueType": [
+                          "text"
+                        ]
+                      },
+                      "type": "rule"
+                    }
+                  ],
+                  "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+                  "type": "group"
+                },
+                "whereString": "commit_id = '${commit}'"
+              },
+              "table": "measurements"
+            }
+          ],
+          "title": "CPU Utilization",
+          "transparent": true,
+          "type": "timeseries"
+        },
+        {
+          "datasource": {},
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": 60000,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "percent"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 65
+          },
+          "id": 4,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "default": true,
+                "type": "grafana-postgresql-datasource",
+                "uid": "be28nkzirtb0gd"
+              },
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT\n  b.commit_id,\n  d.gpu_util,\n  d.time\nFROM\n  benchmarks AS b\n  JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n  branch = '${branch}';",
+              "refId": "A",
+              "sql": {
+                "columns": [
+                  {
+                    "parameters": [
+                      {
+                        "name": "cpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "\"time\"",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  }
+                ],
+                "groupBy": [
+                  {
+                    "property": {
+                      "type": "string"
+                    },
+                    "type": "groupBy"
+                  }
+                ],
+                "limit": 50,
+                "whereJsonTree": {
+                  "children1": [
+                    {
+                      "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+                      "properties": {
+                        "field": "commit_id",
+                        "fieldSrc": "field",
+                        "operator": "equal",
+                        "value": [
+                          "${commit}"
+                        ],
+                        "valueError": [
+                          null
+                        ],
+                        "valueSrc": [
+                          "value"
+                        ],
+                        "valueType": [
+                          "text"
+                        ]
+                      },
+                      "type": "rule"
+                    }
+                  ],
+                  "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+                  "type": "group"
+                },
+                "whereString": "commit_id = '${commit}'"
+              },
+              "table": "measurements"
+            }
+          ],
+          "title": "GPU Utilization",
+          "transparent": true,
+          "type": "timeseries"
+        },
+        {
+          "datasource": {},
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": 60000,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "decmbytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 74
+          },
+          "id": 2,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "default": true,
+                "type": "grafana-postgresql-datasource",
+                "uid": "be28nkzirtb0gd"
+              },
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT d.mem_megabytes, d.time FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}';",
+              "refId": "A",
+              "sql": {
+                "columns": [
+                  {
+                    "parameters": [
+                      {
+                        "name": "cpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "\"time\"",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  }
+                ],
+                "groupBy": [
+                  {
+                    "property": {
+                      "type": "string"
+                    },
+                    "type": "groupBy"
+                  }
+                ],
+                "limit": 50,
+                "whereJsonTree": {
+                  "children1": [
+                    {
+                      "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+                      "properties": {
+                        "field": "commit_id",
+                        "fieldSrc": "field",
+                        "operator": "equal",
+                        "value": [
+                          "${commit}"
+                        ],
+                        "valueError": [
+                          null
+                        ],
+                        "valueSrc": [
+                          "value"
+                        ],
+                        "valueType": [
+                          "text"
+                        ]
+                      },
+                      "type": "rule"
+                    }
+                  ],
+                  "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+                  "type": "group"
+                },
+                "whereString": "commit_id = '${commit}'"
+              },
+              "table": "measurements"
+            }
+          ],
+          "title": "Memory usage",
+          "transparent": true,
+          "type": "timeseries"
+        },
+        {
+          "datasource": {},
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": 60000,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "decmbytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 74
+          },
+          "id": 3,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "default": true,
+                "type": "grafana-postgresql-datasource",
+                "uid": "be28nkzirtb0gd"
+              },
+              "editorMode": "code",
+              "format": "table",
+              "rawQuery": true,
+              "rawSql": "SELECT\n  d.gpu_mem_megabytes,\n  d.time\nFROM\n  benchmarks AS b\n  JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id\nWHERE\n  branch = '${branch}';",
+              "refId": "A",
+              "sql": {
+                "columns": [
+                  {
+                    "parameters": [
+                      {
+                        "name": "cpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_util",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "gpu_mem_megabytes",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  },
+                  {
+                    "parameters": [
+                      {
+                        "name": "\"time\"",
+                        "type": "functionParameter"
+                      }
+                    ],
+                    "type": "function"
+                  }
+                ],
+                "groupBy": [
+                  {
+                    "property": {
+                      "type": "string"
+                    },
+                    "type": "groupBy"
+                  }
+                ],
+                "limit": 50,
+                "whereJsonTree": {
+                  "children1": [
+                    {
+                      "id": "baa888b8-89ab-4cde-b012-31922f8671e9",
+                      "properties": {
+                        "field": "commit_id",
+                        "fieldSrc": "field",
+                        "operator": "equal",
+                        "value": [
+                          "${commit}"
+                        ],
+                        "valueError": [
+                          null
+                        ],
+                        "valueSrc": [
+                          "value"
+                        ],
+                        "valueType": [
+                          "text"
+                        ]
+                      },
+                      "type": "rule"
+                    }
+                  ],
+                  "id": "bab88a98-0123-4456-b89a-b1922f7d4f11",
+                  "type": "group"
+                },
+                "whereString": "commit_id = '${commit}'"
+              },
+              "table": "measurements"
+            }
+          ],
+          "title": "GPU memory usage",
+          "transparent": true,
+          "type": "timeseries"
+        }
+      ],
+      "title": "Usage metrics",
+      "type": "row"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "main",
+          "value": "main"
+        },
+        "datasource": {
+          "default": true,
+          "type": "grafana-postgresql-datasource",
+          "uid": "be28nkzirtb0gd"
+        },
+        "definition": "SELECT DISTINCT branch FROM benchmarks;",
+        "description": "",
+        "hide": 0,
+        "includeAll": false,
+        "label": "branch",
+        "multi": false,
+        "name": "branch",
+        "options": [],
+        "query": "SELECT DISTINCT branch FROM benchmarks;",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {
+          "selected": false,
+          "text": "1729701492845",
+          "value": "1729701492845"
+        },
+        "datasource": {
+          "default": true,
+          "type": "grafana-postgresql-datasource",
+          "uid": "be28nkzirtb0gd"
+        },
+        "definition": "SELECT created_at - INTERVAL '5 secs' FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id ASC LIMIT 1;",
+        "description": "",
+        "hide": 2,
+        "includeAll": false,
+        "multi": false,
+        "name": "StartTime",
+        "options": [],
+        "query": "SELECT created_at - INTERVAL '5 secs' FROM benchmarks WHERE branch = '${branch}' ORDER BY benchmark_id ASC LIMIT 1;",
+        "refresh": 2,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {
+          "selected": false,
+          "text": "1730393397577",
+          "value": "1730393397577"
+        },
+        "datasource": {
+          "default": true,
+          "type": "grafana-postgresql-datasource",
+          "uid": "be28nkzirtb0gd"
+        },
+        "definition": "SELECT time + INTERVAL '5 secs' FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}' ORDER BY b.benchmark_id DESC, d.measurement_id DESC LIMIT 1;",
+        "description": "",
+        "hide": 2,
+        "includeAll": false,
+        "multi": false,
+        "name": "EndTime",
+        "options": [],
+        "query": "SELECT time + INTERVAL '5 secs' FROM benchmarks AS b JOIN device_measurements AS d ON b.benchmark_id = d.benchmark_id WHERE branch = '${branch}' ORDER BY b.benchmark_id DESC, d.measurement_id DESC LIMIT 1;",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {
+          "selected": false,
+          "text": "NVIDIA A10G",
+          "value": "NVIDIA A10G"
+        },
+        "datasource": {
+          "type": "grafana-postgresql-datasource",
+          "uid": "be28nkzirtb0gd"
+        },
+        "definition": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
+        "description": "",
+        "hide": 0,
+        "includeAll": false,
+        "label": "GPU",
+        "multi": false,
+        "name": "gpu_name",
+        "options": [],
+        "query": "SELECT DISTINCT metadata->>'gpu_name' FROM benchmarks;",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      },
+      {
+        "current": {
+          "selected": true,
+          "text": "10",
+          "value": "10"
+        },
+        "description": "The number of commits to display, going from most recent to the nth commit.",
+        "hide": 0,
+        "label": "Last # of commits",
+        "name": "last_n_commits",
+        "options": [
+          {
+            "selected": true,
+            "text": "10",
+            "value": "10"
+          }
+        ],
+        "query": "10",
+        "skipUrlSync": false,
+        "type": "textbox"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {
+    "hidden": false
+  },
+  "timezone": "browser",
+  "title": "Transformers benchmarks",
+  "uid": "fdz33iyzln9c0a",
+  "version": 10,
+  "weekStart": ""
+}
--- a/benchmark/grafana_datasource.yaml
+++ b/benchmark/grafana_datasource.yaml
+apiVersion: 1
+datasources:
+  - name: grafana-postgresql-datasource
+    uid: be28nkzirtb0gd
+    type: postgres
+    url: $GRAFANA_POSTGRES_DATASOURCE_URL
+    user: $GRAFANA_POSTGRES_DATASOURCE_USER
+    secureJsonData:
+      password: $GRAFANA_POSTGRES_DATASOURCE_PWD
+    jsonData:
+      database: metrics
+      maxOpenConns: 100
+      maxIdleConns: 100
+      maxIdleConnsAuto: true
+      connMaxLifetime: 14400
+      postgresVersion: 1000
+      timescaledb: false